pdf_page.cpp

Go to the documentation of this file.
00001 /*
00002  * File Name: pdf_page.cpp
00003  */
00004 
00005 /*
00006  * This file is part of uds-plugin-pdf.
00007  *
00008  * uds-plugin-pdf is free software: you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation, either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * uds-plugin-pdf is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program. If not, see <http://www.gnu.org/licenses/>.
00020  */
00021 
00022 /**
00023  * Copyright (C) 2008 iRex Technologies B.V.
00024  * All rights reserved.
00025  */
00026 
00027 #include "utils.h"
00028 #include "log.h"
00029 #include "task.h"
00030 
00031 #include "pdf_page.h"
00032 #include "pdf_doc_controller.h"
00033 #include "pdf_renderer.h"
00034 
00035 namespace pdf
00036 {
00037 
00038 void init_render_area(RenderArea & area)
00039 {
00040     area.x_offset = 0.0f;
00041     area.y_offset = 0.0f;
00042     area.width  = -1.0f;
00043     area.height = -1.0f;
00044 }
00045 
00046 bool is_render_area_valid(const RenderArea & area)
00047 {
00048     return (area.width > 0.0f && area.height > 0.0f);
00049 }
00050 
00051 void get_content_area_in_pixel(const RenderArea & area,
00052                                const int origin_width,
00053                                const int origin_height,
00054                                PluginRectangle & rect)
00055 {
00056     rect.x = static_cast<int>(origin_width * area.x_offset);
00057     rect.y = static_cast<int>(origin_height * area.y_offset);
00058     rect.width = static_cast<int>(origin_width * area.width);
00059     rect.height = static_cast<int>(origin_height * area.height);
00060 }
00061 
00062 void get_std_string_from_text_word(TextWord * word, std::string & result)
00063 {
00064     /*UGooString u_str(*(word->getText()));
00065 
00066     int len = u_str.getLength() * sizeof(Unicode);
00067 
00068     char *res_buf = new char[len];
00069     if (ucs2utf8(reinterpret_cast<char*>(u_str.unicode()), len, res_buf, len))
00070     {
00071         result = std::string(res_buf);
00072     }*/
00073 
00074     GooString * text = word->getText();
00075     if (text != 0)
00076     {
00077         result = std::string(text->getCString());
00078         delete text;
00079     }
00080 }
00081 
00082 PDFPage::PDFPage(int page_num, const PDFRenderAttributes & attr)
00083 {
00084     init();
00085     page_number = page_num;
00086     render_attr = attr;
00087 }
00088 
00089 PDFPage::~PDFPage(void)
00090 {
00091     destroy();
00092 }
00093 
00094 void PDFPage::init()
00095 {
00096     page_number = 0;
00097     bitmap = 0;
00098     links = 0;
00099     text = 0;
00100     doc_controller = 0;
00101     b_lock = false;
00102     render_status = RENDER_STOP;
00103     ref_id = PRERENDER_REF_ID;
00104     init_render_area(content_area);
00105 }
00106 
00107 size_t PDFPage::operator()()
00108 {
00109     return static_cast<size_t>(page_number);
00110 }
00111 
00112 bool PDFPage::operator == (const PDFPage & right)
00113 {
00114     return ((this->page_number == right.page_number) &&
00115             this->render_attr == right.render_attr);
00116 }
00117 
00118 bool PDFPage::operator == (const PDFRenderAttributes & right)
00119 {
00120     return (this->render_attr == right);
00121 }
00122 
00123 void PDFPage::set_render_attr(const PDFRenderAttributes & attr)
00124 {
00125     if (render_attr == attr)
00126     {
00127         return;
00128     }
00129 
00130     render_attr = attr;
00131 }
00132 
00133 void PDFPage::destroy_text()
00134 {
00135     if (text) 
00136     {
00137         delete text;
00138         text = 0;
00139     }
00140 }
00141 
00142 unsigned int PDFPage::destroy_bitmap()
00143 {
00144     unsigned int size = 0;
00145     if (bitmap)
00146     {
00147         size = length();
00148         delete bitmap;
00149         bitmap = 0;
00150     }
00151     return size;
00152 }
00153 
00154 void PDFPage::destroy_links()
00155 {
00156     if (links) 
00157     {
00158         delete links;
00159         links = 0;
00160     }
00161 }
00162 
00163 unsigned int PDFPage::destroy()
00164 {
00165     if (locked())
00166     {
00167         return 0;
00168     }
00169 
00170     if (get_render_status() == RENDER_RUNNING)
00171     {
00172         // if the page is in rendering, cannot delete it
00173         // it won't happen now, because the deleting is executed
00174         // in working thread.
00175         return 0;
00176     }
00177 
00178     // reset the render status
00179     set_render_status(RENDER_STOP);
00180 
00181     destroy_links();
00182     destroy_text();
00183     unsigned int size = destroy_bitmap();
00184 
00185     return size;
00186 }
00187 
00188 TextWordList* PDFPage::get_words_list()
00189 {
00190     TextWordList *words = 0;
00191 
00192     if (text)
00193     {
00194         words = text->makeWordList(gFalse);
00195     }
00196 
00197     return words;
00198 }
00199 
00200 int PDFPage::get_bitmap_width()
00201 {
00202     if (!bitmap)
00203     {
00204         return 0;
00205     }
00206 
00207     return bitmap->getWidth();
00208 }
00209 
00210 int PDFPage::get_bitmap_height()
00211 {
00212     if (!bitmap)
00213     {
00214         return 0;
00215     }
00216 
00217     return bitmap->getHeight();
00218 }
00219 
00220 unsigned int PDFPage::length()
00221 {
00222     if (!bitmap)
00223     {
00224         return 0;
00225     }
00226     return bitmap->getHeight() * bitmap->getRowSize();
00227 }
00228 
00229 unsigned int PDFPage::try_calc_length(const double zoom_value
00230                                       , const double crop_width
00231                                       , const double crop_height)
00232 {
00233     int width = static_cast<int>(crop_width + 1.0f);
00234     int height = static_cast<int>(crop_height + 1.0f);
00235     double zoom = (zoom_value + 1.0f)/ 100.0f;
00236     int row_stride = ((width + 3)>> 2) << 2;
00237 
00238     return static_cast<unsigned int>(row_stride * height * zoom * zoom);
00239 }
00240 
00241 int PDFPage::get_bitmap_row_stride()
00242 {
00243     if (!bitmap)
00244     {
00245         return 0;
00246     }
00247 
00248     return bitmap->getRowSize();
00249 }
00250 
00251 const unsigned char* PDFPage::get_bitmap_data()
00252 {
00253     if (!bitmap)
00254     {
00255         return 0;
00256     }
00257 
00258     return bitmap->getDataPtr();
00259 }
00260 
00261 void PDFPage::update_bitmap(SplashBitmap *m) 
00262 {
00263     if (bitmap == m)
00264     {
00265         return;
00266     }
00267 
00268     bitmap = m;
00269 }
00270 
00271 void PDFPage::update_links(Links *l) 
00272 {
00273     if (links == l)
00274     {
00275         return;
00276     }
00277 
00278     links = l;
00279 }
00280 
00281 void PDFPage::update_text(TextPage *t) 
00282 {
00283     if (text == t)
00284     {
00285         return;
00286     }
00287 
00288     text = t;
00289 }
00290 
00291 SearchResult PDFPage::search(SearchContext &ctx
00292     , PDFSearchPage &results)
00293 {
00294     if (!text)
00295     {
00296         return RES_ERROR;
00297     }
00298 
00299     TextWordList *words = get_words_list();
00300     PluginRangeImpl *result = 0;
00301     SearchResult ret = RES_NOT_FOUND;
00302 
00303     int count = 0;
00304 
00305     // the search operation would update the index of start word
00306     // in the context    
00307     if (ctx.forward)
00308     {
00309         if (ctx.word_cursor < 0)
00310         {
00311             // set the start word to be the first one
00312             ctx.word_cursor = 0;
00313         }
00314 
00315         // search forward
00316         result = search_string_forward(ctx, words);
00317         if (ctx.search_all)
00318         {    
00319             while(result != 0)
00320             {
00321                 results.add(result);
00322                 count++;
00323                 result = search_string_forward(ctx, words);
00324             }
00325         }
00326         else
00327         {
00328             if (result)
00329             {
00330                 results.add(result);
00331                 count++;
00332             }
00333         }
00334 
00335     }
00336     else
00337     {
00338         if (ctx.word_cursor < 0)
00339         {
00340             // set the start word to be the last one
00341             ctx.word_cursor = words->getLength() - 1;
00342         }
00343 
00344         //search backward
00345         result = search_string_backward(ctx, words);
00346         if (ctx.search_all)
00347         {
00348             while(result != 0)
00349             {
00350                 results.add(result);
00351                 count++;
00352                 result = search_string_backward(ctx, words);
00353             }
00354         }
00355         else
00356         {
00357             if (result)
00358             {
00359                 results.add(result);
00360                 count++;
00361             }
00362         }
00363     }
00364 
00365     if (count > 0)
00366     {
00367         ret = RES_OK;
00368     }
00369 
00370     delete words;
00371     return ret;
00372 }
00373 
00374 // Search the destination string forwardly, at the same time update the 
00375 // word index
00376 PluginRangeImpl* PDFPage::search_string_forward(SearchContext &ctx
00377     , TextWordList *words)
00378 {
00379     if (ctx.dst_words.empty())
00380     {
00381         return 0;
00382     }
00383 
00384     // update the current index
00385     PluginRangeImpl *result = 0;
00386 
00387     int cur_word = ctx.word_cursor;
00388     int len      = words->getLength();
00389     if (cur_word >= len)
00390     {
00391         // there is no words left in current page
00392         return 0;
00393     }
00394 
00395     TextWord *word = words->get(cur_word);
00396     if (ctx.char_cursor >= word->getLength())
00397     {
00398         // move to the next word
00399         cur_word++;
00400         // reset char cursor
00401         ctx.char_cursor = 0;
00402     }
00403 
00404     SearchWords words_queue;
00405 
00406     if (ctx.dst_words.size() > 1)
00407     {
00408         // if need searching a words list, we should search by the status
00409         // status of the metching procedure
00410         MatchStatus stat = STATUS_HEADER;
00411 
00412         // index of the word in destination string
00413         int dst_word_index = 0;
00414 
00415         // set the cursor to record the first matched result
00416         // if the suceed words are not matched, roll back to this word
00417         int first_matched_word = cur_word;
00418 
00419         while (cur_word < len)
00420         {
00421             // match success, break out
00422             if (dst_word_index >= static_cast<int>(ctx.dst_words.size()))
00423             {
00424                 break;
00425             }
00426 
00427             word = words->get(cur_word);
00428 
00429             if (word == 0)
00430             {
00431                 // the word is NULL
00432                 ERRORPRINTF("Null word in search");
00433                 break;
00434             }
00435 
00436             //string word_str(word->getText()->getCString());
00437             string word_str;
00438             get_std_string_from_text_word(word, word_str);
00439 
00440             if (word_str.empty())
00441             {
00442                 // the word is empty
00443                 ERRORPRINTF("Empty word in search");
00444                 break;
00445             }
00446 
00447             // record the index of search result
00448             int start_result_idx = 0;
00449 
00450             // set the search context
00451             bool forward = true;
00452             bool match_whole_word = ctx.match_whole_word;
00453             if (!match_whole_word)
00454             {
00455                 if (stat == STATUS_BODY)
00456                 {
00457                     // if it is "body" word in the words list
00458                     // we must compare by whole word
00459                     match_whole_word = true;
00460                 }
00461                 else if (stat == STATUS_HEADER)
00462                 {
00463                     // if it is "header" word in the words list
00464                     // we must compare from the end char
00465                     forward = false;
00466                     // reset the char cursor
00467                     ctx.char_cursor = static_cast<int>(word_str.size()) - 1;
00468                 }
00469             }
00470 
00471             if (compare_string(ctx.dst_words[dst_word_index]
00472                 , word_str
00473                 , ctx.case_sensitive
00474                 , match_whole_word
00475                 , forward
00476                 , true
00477                 , ctx.char_cursor
00478                 , start_result_idx))
00479             {
00480                 // update the matching status
00481                 int next_word_index = dst_word_index + 1;
00482 
00483                 switch (stat)
00484                 {
00485                 case STATUS_HEADER:                   
00486                     if (next_word_index < 
00487                         (static_cast<int>(ctx.dst_words.size()) - 1))
00488                     {
00489                         // next word is the body one
00490                         stat = STATUS_BODY;
00491                     }
00492                     else if (next_word_index == 
00493                         (static_cast<int>(ctx.dst_words.size()) - 1))
00494                     {
00495                         // next word is the last one
00496                         stat = STATUS_TAIL;
00497                     }
00498 
00499                     // push the match word into the results list
00500                     words_queue.add(SearchWordRecord(cur_word
00501                         , start_result_idx
00502                         , word->getLength() - 1));
00503 
00504                     // set the index of first match word
00505                     first_matched_word = cur_word;
00506 
00507                     break;
00508                 case STATUS_BODY:
00509                     if (next_word_index ==
00510                         (static_cast<int>(ctx.dst_words.size()) - 1))
00511                     {
00512                         // next word is the last one
00513                         stat = STATUS_TAIL;
00514                     }
00515 
00516                     // push the match word into the results list
00517                     words_queue.add(SearchWordRecord(cur_word
00518                         , start_result_idx
00519                         , word->getLength() - 1));
00520 
00521                     break;
00522                 case STATUS_TAIL:
00523                     //Search succeed
00524                     // push the match word into the results list
00525                     words_queue.add(SearchWordRecord(cur_word
00526                         , start_result_idx
00527                         , start_result_idx
00528                         + static_cast<int>(ctx.dst_words[dst_word_index].size()) - 1));
00529 
00530                     break;
00531                 default:
00532                     break;
00533                 }
00534 
00535                 // move index to the next word
00536                 dst_word_index = next_word_index;
00537             }
00538             else
00539             {
00540                 //match fails, clear the previous status and the results queue
00541                 if (stat != STATUS_HEADER)
00542                 {
00543                     dst_word_index = 0;
00544                     stat = STATUS_HEADER;
00545                     words_queue.clear();
00546 
00547                     // roll back to the first matched word
00548                     cur_word = first_matched_word;
00549                 }
00550             }
00551 
00552             // next text word
00553             cur_word++;
00554             ctx.char_cursor = 0;
00555         }
00556     }
00557     else
00558     {
00559         // if only search a single word, we should take the situation
00560         // that pattern string appears repeatly in the source string
00561         while (cur_word < len)
00562         {
00563 
00564             word = words->get(cur_word);
00565 
00566             if (word == 0)
00567             {
00568                 ERRORPRINTF("Null word in search");
00569                 break;
00570             }
00571 
00572             //string word_str(word->getText()->getCString());
00573             string word_str;
00574             get_std_string_from_text_word(word, word_str);
00575 
00576             if (word_str.empty())
00577             {
00578                 ERRORPRINTF("Empty word in search");
00579                 break;
00580             }
00581 
00582             int start_result_idx = 0;
00583             if (compare_string(ctx.dst_words[0]
00584                 , word_str
00585                 , ctx.case_sensitive
00586                 , ctx.match_whole_word
00587                 , true
00588                 , false
00589                 , ctx.char_cursor
00590                 , start_result_idx))
00591             {
00592                 // this word meets the condition
00593                 // push it into the results list
00594                 words_queue.add(SearchWordRecord(cur_word
00595                     , start_result_idx
00596                     , start_result_idx + static_cast<int>(ctx.dst_words[0].size()) - 1));
00597                 break;
00598             }
00599 
00600             // next text word
00601             cur_word++;
00602             ctx.char_cursor = 0;
00603         }
00604 
00605     }
00606 
00607     // generate the search result
00608     if (words_queue.get_count() == static_cast<int>(ctx.dst_words.size()))
00609     {
00610         generate_search_result(ctx, words_queue, result, true);
00611     }
00612 
00613     return result;
00614 }
00615 
00616 PluginRangeImpl* PDFPage::search_string_backward(SearchContext &ctx
00617     , TextWordList *words)
00618 {
00619     int len = words->getLength();
00620     if (len <= 0)
00621     {
00622         return 0;
00623     }
00624 
00625     // update the current index
00626     PluginRangeImpl *result = 0;
00627 
00628     int cur_word = ctx.word_cursor;
00629     if (cur_word >= len)
00630     {
00631         cur_word = len - 1;
00632     }
00633 
00634     TextWord *word = 0;
00635     if (ctx.char_cursor < 0)
00636     {
00637         // move to the previous word
00638         cur_word--;
00639         if (cur_word >= 0)
00640         {
00641             word = words->get(cur_word);
00642             // reset the char cursor
00643             ctx.char_cursor = word->getLength() - 1;
00644         }
00645     }
00646 
00647     SearchWords words_queue;
00648 
00649     if (ctx.dst_words.size() > 1)
00650     {
00651         // status of the metching procedure
00652         // the initial status is "tail"
00653         MatchStatus stat = STATUS_TAIL;
00654 
00655         // index of the word in destination string
00656         int dst_words_end  = static_cast<int>(ctx.dst_words.size()) - 1;
00657         int dst_word_index = dst_words_end;
00658 
00659         // set the cursor to record the first matched result
00660         // if the suceed words are not matched, roll back to this word
00661         int first_matched_word = cur_word;
00662 
00663         while (cur_word >= 0)
00664         {
00665             // match success, break out
00666             if (dst_word_index < 0)
00667             {
00668                 break;
00669             }
00670 
00671             word = words->get(cur_word);
00672 
00673             if (word == 0)
00674             {
00675                 ERRORPRINTF("Null word in search");
00676                 break;
00677             }
00678 
00679             //string word_str(word->getText()->getCString());
00680             string word_str;
00681             get_std_string_from_text_word(word, word_str);
00682 
00683             if (word_str.empty())
00684             {
00685                 ERRORPRINTF("Empty word in search");
00686                 break;
00687             }
00688 
00689             int start_result_idx = 0;
00690             bool forward = false;
00691             bool match_whole_word = ctx.match_whole_word;
00692             if (!match_whole_word)
00693             {
00694                 if (stat == STATUS_BODY)
00695                 {
00696                     // if it is "body" word in the words list
00697                     // we must compare by whole word
00698                     match_whole_word = true;
00699                 }
00700                 else if (stat == STATUS_TAIL)
00701                 {
00702                     // if it is "header" word in the words list
00703                     // we must compare from the end char
00704                     forward = true;
00705                     ctx.char_cursor = 0;
00706                 }
00707             }
00708 
00709             if (compare_string(ctx.dst_words[dst_word_index]
00710                 , word_str
00711                 , ctx.case_sensitive
00712                 , match_whole_word
00713                 , forward
00714                 , true
00715                 , ctx.char_cursor
00716                 , start_result_idx))
00717             {
00718                 // update the matching status
00719                 int next_word_index = dst_word_index - 1;
00720                 switch (stat)
00721                 {
00722                 case STATUS_TAIL:
00723                     if (next_word_index > 0)
00724                     {
00725                         // previous word is the body one
00726                         stat = STATUS_BODY;
00727                     }
00728                     else if (next_word_index == 0)
00729                     {
00730                         // previous word is the header one
00731                         stat = STATUS_HEADER;
00732                     }
00733 
00734                     // push the match word into the results list
00735                     words_queue.add(SearchWordRecord(cur_word
00736                         , start_result_idx
00737                         , start_result_idx
00738                         + static_cast<int>(ctx.dst_words[dst_word_index].size()) - 1));
00739 
00740                     // set the first matched word
00741                     first_matched_word = cur_word;
00742 
00743                     break;
00744                 case STATUS_BODY:
00745                     if (next_word_index == 0)
00746                     {
00747                         // previous word is the header one
00748                         stat = STATUS_HEADER;
00749                     }
00750 
00751                     // push the match word into the results list
00752                     words_queue.add(SearchWordRecord(cur_word
00753                         , start_result_idx
00754                         , word->getLength() - 1));
00755 
00756                     break;
00757                 case STATUS_HEADER:
00758                     // search succeed
00759                     // push the match word into the results list
00760                     words_queue.add(SearchWordRecord(cur_word
00761                         , start_result_idx
00762                         , word->getLength() - 1));
00763 
00764                     break;
00765                 default:
00766                     break;
00767                 }
00768                 // move index to the previous word
00769                 dst_word_index = next_word_index;
00770             }
00771             else
00772             {
00773                 //match fails, clear the previous status and the results queue
00774                 if (stat != STATUS_TAIL)
00775                 {
00776                     dst_word_index = dst_words_end;
00777                     stat = STATUS_TAIL;
00778                     words_queue.clear();
00779 
00780                     // roll back to the first matched word
00781                     cur_word = first_matched_word;
00782                 }
00783             }
00784 
00785             // next text word
00786             cur_word--;
00787             if (cur_word >= 0)
00788             {
00789                 ctx.char_cursor = words->get(cur_word)->getLength() - 1;
00790             }
00791         }
00792     }
00793     else
00794     {
00795         while (cur_word >= 0)
00796         {
00797             // if only search a single word, we can simply compare it
00798             word = words->get(cur_word);
00799 
00800             if (word == 0)
00801             {
00802                 ERRORPRINTF("Null word in search");
00803                 break;
00804             }
00805 
00806             //string word_str(word->getText()->getCString());
00807             string word_str;
00808             get_std_string_from_text_word(word, word_str);
00809 
00810             if (word_str.empty())
00811             {
00812                 ERRORPRINTF("Empty word in search");
00813                 break;
00814             }
00815 
00816             int start_result_idx = 0;
00817             if (compare_string(ctx.dst_words[0]
00818                 , word_str
00819                 , ctx.case_sensitive
00820                 , ctx.match_whole_word
00821                 , false
00822                 , false
00823                 , ctx.char_cursor
00824                 , start_result_idx))
00825             {
00826                 // this word meets the condition
00827                 // push it into the results list
00828                 words_queue.add(SearchWordRecord(cur_word
00829                     , start_result_idx
00830                     , start_result_idx + static_cast<int>(ctx.dst_words[0].size()) - 1));
00831                 break;
00832             }
00833             // next text word
00834             cur_word--;
00835             if (cur_word >= 0)
00836             {
00837                 ctx.char_cursor = words->get(cur_word)->getLength() - 1;
00838             }
00839         }
00840     }
00841 
00842     // generate the search result
00843     if (words_queue.get_count() == static_cast<int>(ctx.dst_words.size()))
00844     {
00845         generate_search_result(ctx, words_queue, result, false);
00846     }
00847 
00848     return result;
00849 }
00850 
00851 void PDFPage::generate_search_result(SearchContext &ctx
00852     , SearchWords &queue
00853     , PluginRangeImpl* &result
00854     , bool forward)
00855 {
00856     if (queue.get_count() == 0)
00857     {
00858         return;
00859     }
00860 
00861     result = new PluginRangeImpl;
00862     SearchWordRecord begin;
00863     SearchWordRecord end;
00864     if (forward)
00865     {
00866         begin = queue.front();
00867         end   = queue.back();
00868     }
00869     else
00870     {
00871         begin = queue.back();
00872         end   = queue.front();
00873     }
00874 
00875     int idx_start  = begin.start_char_index;
00876     int idx_end    = end.end_char_index;
00877     int word_start = begin.word_index;
00878     int word_end   = end.word_index;
00879 
00880     if (forward)
00881     {
00882         // set the current search position to the last word
00883         ctx.word_cursor = word_end;
00884         ctx.char_cursor = idx_end;
00885     }
00886     else
00887     {
00888         // set the current search position to the first word
00889         ctx.word_cursor = word_start;
00890         ctx.char_cursor = idx_start;
00891     }
00892 
00893     PDFAnchor param;
00894     param.page_num = page_number;
00895     param.word_num = word_start;
00896     param.char_idx = idx_start;
00897     //param.file_name = get_doc_controller()->name();
00898     result->start_anchor = new StringImpl(param.get_string());
00899 
00900     param.char_idx = idx_end;
00901     param.word_num = word_end;
00902     result->end_anchor = new StringImpl(param.get_string());
00903 }
00904 
00905 bool PDFPage::compare_string(const string &dst
00906     , const string &src
00907     , bool case_sensitive
00908     , bool match_whole_word
00909     , bool forward
00910     , bool sub_string
00911     , const int start_char_idx
00912     , int &start_result_idx)
00913 {
00914     string dst_str = dst;
00915     string src_str = src;
00916 
00917     if (!case_sensitive)
00918     {
00919         //Transform all of the charactor into upper case
00920         std::transform(dst_str.begin(), dst_str.end(), dst_str.begin(), (int(*)(int))tolower);
00921 
00922         std::transform(src_str.begin(), src_str.end(), src_str.begin(), (int(*)(int))tolower);
00923     }
00924 
00925     if (match_whole_word)
00926     {
00927         // return false if size dismatch
00928         if (dst_str.size() != src_str.size())
00929         {
00930             return false;
00931         }
00932 
00933         return dst_str == src_str;
00934     }
00935 
00936     if (forward)
00937     {
00938         // test the length
00939         if (src.size() - start_char_idx < dst_str.size())
00940         {
00941             return false;
00942         }
00943 
00944         if (sub_string)
00945         {
00946             // retrieve the a sub string from the start character index
00947             // then compare with the destination string
00948             src_str = src_str.substr(start_char_idx, dst_str.size());
00949 
00950             if (src_str == dst_str)
00951             {
00952                 // if succeed, return the start index
00953                 start_result_idx = start_char_idx;
00954             }
00955             else
00956             {
00957                 // otherwise return error position
00958                 start_result_idx = static_cast<int>(src_str.npos);
00959             }
00960         }
00961         else
00962         {
00963             start_result_idx = static_cast<int>(src_str.find(dst_str
00964                 , static_cast<size_t>(start_char_idx)));
00965         }
00966     }
00967     else
00968     {
00969         // test the length
00970         if (start_char_idx - static_cast<int>(dst_str.size()) + 1 < 0)
00971         {
00972             return false;
00973         }
00974 
00975         if (sub_string)
00976         {
00977             // set the start position of retrieving
00978             int pos = start_char_idx - static_cast<int>(dst_str.size()) + 1;
00979 
00980             if (start_char_idx >= static_cast<int>(src_str.size()))
00981             {
00982                 return false;
00983             }
00984 
00985             // retrieve the sub string and stop at the char (start + 1)
00986             src_str = src_str.substr(pos, start_char_idx + 1);
00987 
00988             if (src_str == dst_str)
00989             {
00990                 // if succeed, return the start position
00991                 start_result_idx = pos;
00992             }
00993             else
00994             {
00995                 // otherwise, return the error position
00996                 start_result_idx = static_cast<int>(src_str.npos);
00997             }
00998         }
00999         else
01000         {
01001             start_result_idx = static_cast<int>(src_str.rfind(dst_str
01002                 , static_cast<size_t>(start_char_idx)));
01003         }
01004     }
01005 
01006     if (static_cast<size_t>(start_result_idx) != src_str.npos)
01007     {
01008         return  true;
01009     }
01010 
01011     return false;
01012 }
01013 
01014 bool merge_rectangle(const double x_min, const double y_min, 
01015     const double x_max, const double y_max, 
01016     PDFRectangle *rect)
01017 {
01018     if (!rect->isValid())
01019     {
01020         // initialize the rectangle
01021         rect->x1 = x_min;
01022         rect->x2 = x_max;
01023         rect->y1 = y_min;
01024         rect->y2 = y_max;
01025         return true;
01026     }
01027 
01028     if (fabs(y_min - rect->y1) < ZERO_RANGE
01029         && fabs(y_max - rect->y2) < ZERO_RANGE)
01030     {
01031         rect->x1 = min(x_min, rect->x1);
01032         rect->x2 = max(x_max, rect->x2);
01033         return true;
01034     }
01035 
01036     return false;
01037 }
01038 
01039 bool PDFPage::get_bounding_rectangles(const string &start_anchor
01040     , const string &end_anchor
01041     , PDFRectangles &rects)
01042 {
01043     PDFAnchor start_param(start_anchor);
01044     PDFAnchor end_param(end_anchor);
01045 
01046     //Cannot identify the word in seperated pages
01047     if (start_param.page_num != page_number ||
01048         start_param.page_num != end_param.page_num)
01049     {
01050         return false;
01051     }
01052 
01053     //int len = end_param.char_idx - start_param.char_idx + 1;
01054     //assert(len > 0);
01055     //assert(start_param.word_num >= 0 && end_param.word_num >= 0);
01056 
01057     // TODO. Get rectange from hyperlink (without word)
01058     PDFRectangle pdf_rect;
01059     if (start_param.word_num >= 0 && end_param.word_num >= 0)
01060     {
01061         TextWordList * words = get_words_list();
01062         for(int i = start_param.word_num; i <= end_param.word_num; ++i)
01063         {
01064             double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01065             words->get(i)->getBBox(&x_min, &y_min, &x_max, &y_max);
01066 
01067             if (!merge_rectangle(x_min, y_min, x_max, y_max, &pdf_rect))
01068             {
01069                 PluginRectangle rect;
01070                 rect.x = static_cast<int>(pdf_rect.x1);
01071                 rect.y = static_cast<int>(pdf_rect.y1);
01072                 rect.width  = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01073                 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01074                 rects.add(rect);
01075 
01076                 // update the rectangle
01077                 pdf_rect.x1 = x_min;
01078                 pdf_rect.x2 = x_max;
01079                 pdf_rect.y1 = y_min;
01080                 pdf_rect.y2 = y_max;
01081             }
01082         }
01083 
01084         delete words;
01085 
01086         if (pdf_rect.isValid())
01087         {
01088             // add the last rectangle into the list
01089             PluginRectangle rect;
01090             rect.x = static_cast<int>(pdf_rect.x1);
01091             rect.y = static_cast<int>(pdf_rect.y1);
01092             rect.width  = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01093             rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01094             rects.add(rect);
01095         }
01096     }
01097     else if (start_param.link_idx >= 0 && end_param.link_idx >= 0)
01098     {
01099         if (links == 0)
01100         {
01101             return false;
01102         }
01103 
01104         for(int i = start_param.link_idx; i <= end_param.link_idx; ++i)
01105         {
01106             double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01107             Link *link = links->getLink(i);
01108             link->getRect(&x_min, &y_min, &x_max, &y_max);
01109             int real_x_min, real_y_min, real_x_max, real_y_max;
01110             coordinates_user_to_dev(x_min, y_min, &real_x_min, &real_y_min);
01111             coordinates_user_to_dev(x_max, y_max, &real_x_max, &real_y_max);
01112             x_min = min(real_x_min, real_x_max);
01113             x_max = max(real_x_min, real_x_max);
01114             y_min = min(real_y_min, real_y_max);
01115             y_max = max(real_y_min, real_y_max);
01116 
01117             if (!merge_rectangle(x_min, y_min, x_max, y_max, &pdf_rect))
01118             {
01119                 PluginRectangle rect;
01120                 rect.x = static_cast<int>(pdf_rect.x1);
01121                 rect.y = static_cast<int>(pdf_rect.y1);
01122                 rect.width  = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01123                 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01124                 rects.add(rect);
01125 
01126                 // update the rectangle
01127                 pdf_rect.x1 = x_min;
01128                 pdf_rect.x2 = x_max;
01129                 pdf_rect.y1 = y_min;
01130                 pdf_rect.y2 = y_max;
01131             }
01132 
01133         }
01134 
01135         if (pdf_rect.isValid())
01136         {
01137             // add the last rectangle into the list
01138             PluginRectangle rect;
01139             rect.x = static_cast<int>(pdf_rect.x1);
01140             rect.y = static_cast<int>(pdf_rect.y1);
01141             rect.width  = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01142             rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01143             rects.add(rect);
01144         }
01145     }
01146 
01147     return true;
01148 }
01149 
01150 bool PDFPage::render_splash_map(PDFRenderer *renderer, void *abort_data)
01151 {
01152     if (locked())
01153     {
01154         if (get_render_status() == RENDER_STOP)
01155         {
01156             // the previous rendering stopped by some reason
01157             // reset the lock
01158             unlock();
01159         }
01160         else
01161         {
01162             // cannot render when the page is locked
01163             LOGPRINTF("Locked, Cannot render\n");
01164             return false;
01165         }
01166     }
01167 
01168     // destroy the pre-rendered results
01169     destroy_links();
01170     doc_controller->update_memory_usage((-1) * destroy_bitmap());
01171 
01172     // set the status to rendering
01173     set_render_status(RENDER_RUNNING);
01174 
01175     // second render the page
01176     RenderRet ret = Render_Error;
01177     SplashBitmap *b = 0;
01178     Links *l = 0;
01179 
01180     // lock when rendering
01181     ScopeMutex m(&(renderer->get_render_mutex()));
01182 
01183     ret = doc_controller->get_pdf_doc()->displayPage(
01184         renderer->get_splash_output_dev()
01185         , page_number
01186         , render_attr.get_real_zoom_value() * 0.01 * renderer->get_view_attr().get_device_dpi_h()
01187         , render_attr.get_real_zoom_value() * 0.01 * renderer->get_view_attr().get_device_dpi_v()
01188         , render_attr.get_rotate()
01189         , gFalse //useMediaBox, TODO.
01190         , gTrue  //crop, TODO.
01191         , gTrue  //doLinks, TODO.
01192         , abort_render_check
01193         , abort_data
01194     );
01195 
01196     if (ret == Render_Error || ret == Render_Invalid)
01197     {
01198         LOGPRINTF("1. Error in rendering page:%d\n", get_page_num());
01199         return false;
01200     }
01201 
01202     // take bitmap
01203     b = renderer->get_splash_output_dev()->takeBitmap();
01204 
01205     // take hyperlinks
01206 #ifdef WIN32
01207     l = doc_controller->get_pdf_doc()->takeLinks();
01208 #else
01209     l = doc_controller->get_pdf_doc()->getLinks(page_number);
01210 #endif
01211 
01212     if (ret == Render_Done)
01213     {
01214         update_bitmap(b);
01215         update_links(l);
01216 
01217         // retrieve ctm and ictm
01218         memcpy(ctm, renderer->get_splash_output_dev()->getDefCTM(), 6 * sizeof(double));
01219         memcpy(ictm, renderer->get_splash_output_dev()->getDefICTM(), 6 * sizeof(double)); 
01220 
01221         doc_controller->update_memory_usage(length());
01222         LOGPRINTF("Rendering of page:%d Done! Length:%d\n", get_page_num(), length());
01223         return true;
01224     }
01225     else if (ret == Render_Abort)
01226     {
01227         // MUST remove the temporary render results
01228         delete b;
01229         delete l;
01230         set_render_status(RENDER_STOP);
01231         LOGPRINTF("Rendering of page:%d is aborted! Task:%p\n", get_page_num(), abort_data);
01232         return false;
01233     }
01234 
01235     LOGPRINTF("2. Error in rendering page:%d\n", get_page_num());
01236     return false;
01237 }
01238 
01239 void PDFPage::set_render_status(RenderStatus s)
01240 {
01241     render_status = s;
01242 }
01243 
01244 bool PDFPage::render_text(PDFRenderer *renderer, bool use_defalt_setting)
01245 {
01246     static const double DEFAULT_ZOOM = 0.2f;
01247 
01248     destroy_text();
01249     // currently, the text rendering cannot be aborted
01250 
01251     // lock when rendering
01252     ScopeMutex m(&(renderer->get_render_mutex()));
01253 
01254     doc_controller->get_pdf_doc()->displayPage(
01255         renderer->get_text_output_dev()
01256         , page_number
01257         , (use_defalt_setting ? DEFAULT_ZOOM : render_attr.get_real_zoom_value() * 0.01) *
01258           renderer->get_view_attr().get_device_dpi_h()
01259         , (use_defalt_setting ? DEFAULT_ZOOM : render_attr.get_real_zoom_value() * 0.01) *
01260           renderer->get_view_attr().get_device_dpi_v()
01261         , render_attr.get_rotate()
01262         , gFalse
01263         , gTrue
01264         , gFalse
01265         );
01266 
01267     update_text(renderer->get_text_output_dev()->takeText());
01268 
01269     return true;
01270 }
01271 
01272 bool PDFPage::get_content_area(PDFRenderer *renderer, RenderArea &area)
01273 {
01274     static const double SHRINK_ZOOM = 0.2f;
01275     static const int    EXPAND_STEP = 2;
01276 
01277     if (!is_render_area_valid(content_area))
01278     {
01279         // lock when rendering
01280         ScopeMutex m(&(renderer->get_render_mutex()));
01281 
01282         RenderRet ret = doc_controller->get_pdf_doc()->displayPage(
01283         renderer->get_thumbnail_output_dev()
01284         , get_page_num()
01285         , SHRINK_ZOOM * renderer->get_view_attr().get_device_dpi_h()
01286         , SHRINK_ZOOM * renderer->get_view_attr().get_device_dpi_v()
01287         , 0
01288         , gFalse  //useMediaBox, TODO.
01289         , gFalse  //crop, TODO.
01290         , gFalse  //doLinks, TODO.
01291         );
01292 
01293         if (ret == Render_Error || ret == Render_Invalid)
01294         {
01295             ERRORPRINTF("Error in rendering thumbnail page:%d\n", get_page_num());
01296             return false;
01297         }
01298 
01299         SplashBitmap *thumb_map = renderer->get_thumbnail_output_dev()->takeBitmap();
01300         PDFRectangle content_rect;
01301         bool succeed = get_content_from_bitmap(thumb_map, content_rect);
01302         // calculate the render area by the rectangle
01303         double page_width = thumb_map->getWidth();
01304         double page_height = thumb_map->getHeight();
01305         delete thumb_map;
01306         if (!succeed)
01307         {
01308             // set the content area to be the page area
01309             content_rect.x1 = content_rect.y1 = 0;
01310             content_rect.x2 = page_width;
01311             content_rect.y2 = page_height;
01312         }
01313         else
01314         {
01315             // expand the content area to avoid content covering
01316             double inc_x2 = 0;
01317             double inc_y2 = 0;
01318 
01319             // expand x1
01320             if (content_rect.x1 > EXPAND_STEP)
01321             {
01322                 content_rect.x1 -= EXPAND_STEP;
01323                 inc_x2 = EXPAND_STEP;
01324             }
01325             else
01326             {
01327                 inc_x2 = content_rect.x1;
01328                 content_rect.x1 = 0;
01329             }
01330 
01331             // expand y1
01332             if (content_rect.y1 > EXPAND_STEP)
01333             {
01334                 content_rect.y1 -= EXPAND_STEP;
01335                 inc_y2 = EXPAND_STEP;
01336             }
01337             else
01338             {
01339                 inc_y2 = content_rect.y1;
01340                 content_rect.y1 = 0;
01341             }
01342 
01343             // expand x2
01344             content_rect.x2 += (inc_x2 + 1);
01345             if (content_rect.x2 > page_width)
01346             {
01347                 content_rect.x2 = page_width;
01348             }
01349 
01350             // expand y2
01351             content_rect.y2 += (inc_y2 + 1);
01352             if (content_rect.y2 > page_height)
01353             {
01354                 content_rect.y2 = page_height;
01355             }
01356         }
01357 
01358         content_area.x_offset = static_cast<float>(content_rect.x1 / page_width);
01359         content_area.y_offset = static_cast<float>(content_rect.y1 / page_height);
01360 
01361         content_area.width =
01362             static_cast<float>((content_rect.x2 - content_rect.x1) / page_width);
01363         content_area.height =
01364             static_cast<float>((content_rect.y2 - content_rect.y1) / page_height);
01365 
01366         if (content_area.width > 1.0f)
01367         {
01368             content_area.x_offset = 0.0f;
01369             content_area.width    = 1.0f;
01370         }
01371         if (content_area.height > 1.0f)
01372         {
01373             content_area.y_offset = 0.0f;
01374             content_area.height   = 1.0f;
01375         }
01376     }
01377 
01378     area = content_area;
01379 
01380     return true;
01381 }
01382 
01383 bool PDFPage::get_content_from_bitmap(SplashBitmap *bitmap, PDFRectangle &rect)
01384 {
01385     static const int BACKGROUND_COLOR = 255;
01386     static const int LINE_STEP        = 1;
01387     static const int SHRINK_STEP      = 1;
01388     static const double SHRINK_RANGE  = 0.3f;
01389 
01390     // top left
01391     int x1 = 0;
01392     int y1 = 0;
01393     // bottom right
01394     int x2 = bitmap->getWidth();
01395     int y2 = bitmap->getHeight();
01396 
01397     int left_edge = static_cast<int>(SHRINK_RANGE * x2);
01398     int right_edge = static_cast<int>((1.0f - SHRINK_RANGE) * x2);
01399     int top_edge = static_cast<int>(SHRINK_RANGE * y2);
01400     int bottom_edge = static_cast<int>((1.0f - SHRINK_RANGE) * y2);
01401 
01402     // current pixel
01403     Guchar cur_pix;
01404     bool stop[4] = {false, false, false, false};
01405 
01406     while (!stop[0] || !stop[1] || !stop[2] || !stop[3])
01407     {
01408         // check top line
01409         int x_cur = x1;
01410         while (x_cur < x2 && !stop[0])
01411         {
01412             bitmap->getPixel(x_cur, y1, &cur_pix);
01413             if (cur_pix != BACKGROUND_COLOR)
01414             {
01415                 stop[0] = true;
01416                 break;
01417             }
01418             x_cur += LINE_STEP;
01419         }
01420 
01421         // check bottom line
01422         x_cur = x1;
01423         while (x_cur < x2 && !stop[1])
01424         {
01425             bitmap->getPixel(x_cur, y2, &cur_pix);
01426             if (cur_pix != BACKGROUND_COLOR)
01427             {
01428                 stop[1] = true;
01429                 break;
01430             }
01431             x_cur += LINE_STEP;
01432         }
01433 
01434         // check left line
01435         int y_cur = y1;
01436         while (y_cur < y2 && !stop[2])
01437         {
01438             bitmap->getPixel(x1, y_cur, &cur_pix);
01439             if (cur_pix != BACKGROUND_COLOR)
01440             {
01441                 stop[2] = true;
01442                 break;
01443             }
01444             y_cur += LINE_STEP;
01445         }
01446 
01447         // check right line
01448         y_cur = y1;
01449         while (y_cur < y2 && !stop[3])
01450         {
01451             bitmap->getPixel(x2, y_cur, &cur_pix);
01452             if (cur_pix != BACKGROUND_COLOR)
01453             {
01454                 stop[3] = true;
01455                 break;
01456             }
01457             y_cur += LINE_STEP;
01458         }
01459 
01460         // shrink the rectangle
01461         if (!stop[2])
01462         {
01463             if (x1 >= left_edge)
01464             {
01465                 stop[2] = true;
01466             }
01467             else
01468             {
01469                 x1 += SHRINK_STEP;
01470             }
01471         }
01472 
01473         if (!stop[3])
01474         {
01475             if (x2 <= right_edge)
01476             {
01477                 stop[3] = true;
01478             }
01479             else
01480             {
01481                 x2 -= SHRINK_STEP;
01482             }
01483         }
01484 
01485         if (!stop[0])
01486         {
01487             if (y1 >= top_edge)
01488             {
01489                 stop[0] = true;
01490             }
01491             else
01492             {
01493                 y1 += SHRINK_STEP;
01494             }
01495         }
01496 
01497         if (!stop[1])
01498         {
01499             if (y2 <= bottom_edge)
01500             {
01501                 stop[1] = true;
01502             }
01503             else
01504             {
01505                 y2 -= SHRINK_STEP;
01506             }
01507         }
01508 
01509     }
01510 
01511     if (stop[0] && stop[1] && stop[2] && stop[3])
01512     {
01513         rect.x1 = x1;
01514         rect.x2 = x2;
01515         rect.y1 = y1;
01516         rect.y2 = y2;
01517         return true;
01518     }
01519 
01520     return false;
01521 }
01522 
01523 GBool PDFPage::abort_render_check(void *data)
01524 {
01525     Task *task = static_cast<Task*>(data);
01526 
01527     return static_cast<GBool>(task->is_aborted());
01528 }
01529 
01530 // (x,y) -> "pdf:/page:8/link:0/word:12/char:06"
01531 void PDFPage::get_anchor_param_from_coordinates(double x, double y
01532     , PDFAnchor &param)
01533 {
01534     int i;
01535     // Caculate whether (x, y) inside a Link and inside which Link
01536     int link_index = -1;
01537     if (links && links->onLink(x, y))
01538     {
01539         int link_num;
01540         Link * link;
01541 
01542         link_num = links->getNumLinks();
01543         for (i = 0; i < link_num; i++)
01544         {
01545             link = links->getLink(i);
01546             if (link && link->inRect(x, y))
01547             {
01548                 link_index = i;
01549                 break;
01550             }
01551         }
01552     }
01553 
01554     // get the anchor of a screen point
01555     // now the anchor is supposed to be like "pdf:/page:8/link:0/word:12/char:06"
01556     // however, if the point is located on a object(image, shape or any thing else),
01557     // word and char cannot be retrieved.
01558     // TODO. add support to the non-text object
01559     int dx, dy;
01560     coordinates_user_to_dev(x, y, &dx, &dy);
01561 
01562     int word_index = -1, char_index = -1;
01563 
01564     TextWordList * words = get_words_list();
01565     if (words != 0)
01566     {
01567         int words_num = words->getLength();
01568         TextWord * word = 0;
01569         double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01570         for(i = 0; i < words_num; i++)
01571         {
01572             word = words->get(i);
01573             word->getBBox(&x_min, &y_min, &x_max, &y_max);
01574 
01575             if ((x_min <= dx) && (dx <= x_max) 
01576                 && (y_min <= dy) && (dy <= y_max))
01577             {
01578                 word_index = i;
01579 
01580                 int chars_num = word->getLength();
01581                 for (int j = 0; j < chars_num; j++)
01582                 {
01583 #ifdef WIN32
01584                     x_min = word->getEdge(j);
01585                     x_max = word->getEdge(j+1);
01586 #else
01587                     word->getCharBBox(j, &x_min, &y_min, &x_max, &y_max); 
01588 #endif
01589                     if ((x_min <= dx) && (dx <= x_max) 
01590                         && (y_min <= dy) && (dy <= y_max))
01591                     {
01592                         char_index = j;
01593                         break;
01594                     }
01595                 }
01596                 break;
01597             }
01598         }
01599 
01600         delete words;
01601     }
01602 
01603     // set the anchor
01604     param.page_num = page_number;
01605     param.link_idx = link_index;
01606     param.word_num = word_index;
01607     param.char_idx = char_index;
01608     //param.file_name = get_doc_controller()->name();
01609 }
01610 
01611 bool PDFPage::get_range_param_by_word_index(const int word_index,
01612                                             PDFAnchor & start_param,
01613                                             PDFAnchor & end_param)
01614 {
01615     TextWordList * words = get_words_list();
01616     bool ret = false;
01617     if (words != 0)
01618     {
01619         int words_num = words->getLength();
01620         if (word_index >= 0 && word_index < words_num)
01621         {
01622             TextWord * word = words->get(word_index);
01623 
01624             start_param.page_num = page_number;
01625             start_param.word_num = word_index;
01626             start_param.char_idx = 0;
01627             //start_param.file_name = get_doc_controller()->name();
01628 
01629             end_param.page_num = page_number;
01630             end_param.word_num = word_index;
01631             end_param.char_idx = word->getLength();
01632             //end_param.file_name = get_doc_controller()->name();
01633 
01634             ret = true;
01635         }
01636         delete words;
01637     }
01638     return ret;
01639 }
01640 
01641 bool PDFPage::get_text_by_range(const PDFAnchor & start_param,
01642                                 const PDFAnchor & end_param,
01643                                 std::string &result)
01644 {
01645     if (!end_param.is_end_anchor() && start_param.page_num != end_param.page_num)
01646     {
01647         // start anchor and end anchor point to different pages, quit
01648         return false;
01649     }
01650 
01651     TextWordList * words = get_words_list();
01652     result.clear();
01653     if (words != 0)
01654     {
01655         int words_num = words->getLength();
01656         int start_index = start_param.word_num;
01657         int end_index = end_param.is_end_anchor() ? words_num : end_param.word_num;
01658 
01659         // Get each word in the range.
01660 
01661         TextWord * word;
01662         string text;
01663 
01664         for (int idx = start_index; idx < end_index; ++idx)
01665         {
01666             word = words->get(idx);
01667             if (word == 0)
01668             {
01669                 // the word is NULL
01670                 ERRORPRINTF("Null word in search");
01671                 break;
01672             }
01673 
01674             get_std_string_from_text_word(word, text);
01675             result += text;
01676             // Append the seperator " "
01677             result += " ";
01678         }
01679 
01680         // Append the last word.
01681         if (start_index <= end_index)
01682         {
01683             word = words->get(end_index);
01684             if (word)
01685             {
01686                 get_std_string_from_text_word(word, text);
01687                 result += text;
01688             }
01689         }
01690 
01691         delete words;
01692     }
01693 
01694     return true;
01695 }
01696 
01697 bool PDFPage::get_range_param_by_link_index(const int link_index,
01698     PDFAnchor & start_param,
01699     PDFAnchor & end_param)
01700 {
01701     if (links)
01702     {
01703         //assert(link_index >= 0 && link_index < links->getNumLinks());
01704         if (link_index < 0 || link_index >= links->getNumLinks())
01705         {
01706             ERRORPRINTF("Error Link Index");
01707             return false;
01708         }
01709 
01710         // Link * link = links->getLink(link_index);
01711 
01712         // Disable the area check
01713         // Do we really need the area check for hyperlinks?
01714         // In most of the PDF documents, hyperlinks are indicated by a rectangle.
01715         // We can just return the index of hyperlink
01716 
01717         /*double x_min = 0.0f, y_min = 0.0f, x_max = 0.0f, y_max = 0.0f, y = 0.0f;
01718         link->getRect(&x_min, &y_min, &x_max, &y_max);
01719 
01720         // calculate the middle point
01721         y = (y_min + y_max) / 2;
01722 
01723         get_anchor_param_from_coordinates(x_min, y, start_param);
01724         get_anchor_param_from_coordinates(x_max, y, end_param);*/
01725 
01726         start_param.page_num = page_number;
01727         start_param.link_idx = link_index;
01728         //start_param.file_name = get_doc_controller()->name();
01729 
01730         end_param.page_num = page_number;
01731         end_param.link_idx = link_index;
01732         //end_param.file_name = get_doc_controller()->name();
01733         return true;
01734     }
01735 
01736     return false;
01737 }
01738 
01739 int PDFPage::get_goto_page_of_link(int link_index)
01740 {
01741     if (links == 0)
01742     {
01743         return 0;
01744     }
01745 
01746     //assert(link_index >= 0 && link_index < links->getNumLinks());
01747     if (link_index < 0 || link_index >= links->getNumLinks())
01748     {
01749         ERRORPRINTF("Error Link Index");
01750         return 0;
01751     }
01752 
01753     Link *link = links->getLink(link_index);
01754     int page_num = 0;
01755     if (link != 0)
01756     {
01757         LinkAction *action = link->getAction();
01758         LinkActionKind kind = action->getKind();
01759         if (kind == actionGoTo)
01760         {
01761             // Caculate the page number of destination of this link.
01762 #ifdef WIN32
01763             UGooString *named_dest = 0;
01764 #else
01765             GooString *named_dest = 0;
01766 #endif
01767             PDFDoc *doc = doc_controller->get_pdf_doc();
01768 
01769             LinkDest *dest = ((LinkGoTo *)action)->getDest();
01770             named_dest = ((LinkGoTo *)action)->getNamedDest();
01771 
01772             if (dest && dest->isPageRef())
01773             {
01774                 Ref pageRef = dest->getPageRef();
01775                 page_num = doc->findPage(pageRef.num, pageRef.gen);
01776             }
01777             else if (named_dest)
01778             {
01779                 dest = doc->findDest(named_dest);
01780                 if (dest)
01781                 {
01782                     Ref pageRef = dest->getPageRef();
01783                     page_num = doc->findPage(pageRef.num, pageRef.gen);
01784                 }
01785             }
01786         }
01787     }
01788     return page_num;
01789 }
01790 
01791 bool PDFPage::is_hyper_linked_page(int dst_page_num)
01792 {
01793     if (links == 0)
01794     {
01795         return false;
01796     }
01797 
01798     int link_num = links->getNumLinks();
01799     if (link_num <= 0)
01800     {
01801         return false;
01802     }
01803 
01804     for (int i = 0; i < link_num; ++i)
01805     {
01806         if (dst_page_num == get_goto_page_of_link(i))
01807         {
01808             return true;
01809         }
01810     }
01811 
01812     return false;
01813 }
01814 
01815 // 4 -> "pdf:/page:8" 
01816 bool PDFPage::get_goto_anchor_of_link(int link_index, std::string & anchor)
01817 {
01818     // Get the anchor of destination for this link.
01819     int page_num = get_goto_page_of_link(link_index);
01820     if (page_num > 0)
01821     {
01822         PDFAnchor param;
01823         param.page_num = page_num;
01824         //param.file_name = get_doc_controller()->name();
01825         anchor = param.get_string();
01826         return true;
01827     }
01828 
01829     return false;
01830 }
01831 
01832 void PDFPage::coordinates_dev_to_user(const double dx, const double dy, 
01833     double * ux, double *uy)
01834 {
01835   *ux = ictm[0] * dx + ictm[2] * dy + ictm[4];
01836   *uy = ictm[1] * dx + ictm[3] * dy + ictm[5];
01837 }
01838 
01839 void PDFPage::coordinates_user_to_dev(const double ux, const double uy, 
01840     int * dx, int *dy)
01841 {
01842   *dx = (int)(ctm[0] * ux + ctm[2] * uy + ctm[4] + 0.5);
01843   *dy = (int)(ctm[1] * ux + ctm[3] * uy + ctm[5] + 0.5);
01844 }
01845 
01846 }//namespace pdf
01847 
01848 
Generated by  doxygen 1.6.2-20100208