00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "utils.h"
00028 #include "log.h"
00029 #include "task.h"
00030
00031 #include "pdf_page.h"
00032 #include "pdf_doc_controller.h"
00033 #include "pdf_renderer.h"
00034
00035 namespace pdf
00036 {
00037
00038 void init_render_area(RenderArea & area)
00039 {
00040 area.x_offset = 0.0f;
00041 area.y_offset = 0.0f;
00042 area.width = -1.0f;
00043 area.height = -1.0f;
00044 }
00045
00046 bool is_render_area_valid(const RenderArea & area)
00047 {
00048 return (area.width > 0.0f && area.height > 0.0f);
00049 }
00050
00051 void get_content_area_in_pixel(const RenderArea & area,
00052 const int origin_width,
00053 const int origin_height,
00054 PluginRectangle & rect)
00055 {
00056 rect.x = static_cast<int>(origin_width * area.x_offset);
00057 rect.y = static_cast<int>(origin_height * area.y_offset);
00058 rect.width = static_cast<int>(origin_width * area.width);
00059 rect.height = static_cast<int>(origin_height * area.height);
00060 }
00061
00062 void get_std_string_from_text_word(TextWord * word, std::string & result)
00063 {
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074 GooString * text = word->getText();
00075 if (text != 0)
00076 {
00077 result = std::string(text->getCString());
00078 delete text;
00079 }
00080 }
00081
00082 PDFPage::PDFPage(int page_num, const PDFRenderAttributes & attr)
00083 {
00084 init();
00085 page_number = page_num;
00086 render_attr = attr;
00087 }
00088
00089 PDFPage::~PDFPage(void)
00090 {
00091 destroy();
00092 }
00093
00094 void PDFPage::init()
00095 {
00096 page_number = 0;
00097 bitmap = 0;
00098 links = 0;
00099 text = 0;
00100 doc_controller = 0;
00101 b_lock = false;
00102 render_status = RENDER_STOP;
00103 ref_id = PRERENDER_REF_ID;
00104 init_render_area(content_area);
00105 }
00106
00107 size_t PDFPage::operator()()
00108 {
00109 return static_cast<size_t>(page_number);
00110 }
00111
00112 bool PDFPage::operator == (const PDFPage & right)
00113 {
00114 return ((this->page_number == right.page_number) &&
00115 this->render_attr == right.render_attr);
00116 }
00117
00118 bool PDFPage::operator == (const PDFRenderAttributes & right)
00119 {
00120 return (this->render_attr == right);
00121 }
00122
00123 void PDFPage::set_render_attr(const PDFRenderAttributes & attr)
00124 {
00125 if (render_attr == attr)
00126 {
00127 return;
00128 }
00129
00130 render_attr = attr;
00131 }
00132
00133 void PDFPage::destroy_text()
00134 {
00135 if (text)
00136 {
00137 delete text;
00138 text = 0;
00139 }
00140 }
00141
00142 unsigned int PDFPage::destroy_bitmap()
00143 {
00144 unsigned int size = 0;
00145 if (bitmap)
00146 {
00147 size = length();
00148 delete bitmap;
00149 bitmap = 0;
00150 }
00151 return size;
00152 }
00153
00154 void PDFPage::destroy_links()
00155 {
00156 if (links)
00157 {
00158 delete links;
00159 links = 0;
00160 }
00161 }
00162
00163 unsigned int PDFPage::destroy()
00164 {
00165 if (locked())
00166 {
00167 return 0;
00168 }
00169
00170 if (get_render_status() == RENDER_RUNNING)
00171 {
00172
00173
00174
00175 return 0;
00176 }
00177
00178
00179 set_render_status(RENDER_STOP);
00180
00181 destroy_links();
00182 destroy_text();
00183 unsigned int size = destroy_bitmap();
00184
00185 return size;
00186 }
00187
00188 TextWordList* PDFPage::get_words_list()
00189 {
00190 TextWordList *words = 0;
00191
00192 if (text)
00193 {
00194 words = text->makeWordList(gFalse);
00195 }
00196
00197 return words;
00198 }
00199
00200 int PDFPage::get_bitmap_width()
00201 {
00202 if (!bitmap)
00203 {
00204 return 0;
00205 }
00206
00207 return bitmap->getWidth();
00208 }
00209
00210 int PDFPage::get_bitmap_height()
00211 {
00212 if (!bitmap)
00213 {
00214 return 0;
00215 }
00216
00217 return bitmap->getHeight();
00218 }
00219
00220 unsigned int PDFPage::length()
00221 {
00222 if (!bitmap)
00223 {
00224 return 0;
00225 }
00226 return bitmap->getHeight() * bitmap->getRowSize();
00227 }
00228
00229 unsigned int PDFPage::try_calc_length(const double zoom_value
00230 , const double crop_width
00231 , const double crop_height)
00232 {
00233 int width = static_cast<int>(crop_width + 1.0f);
00234 int height = static_cast<int>(crop_height + 1.0f);
00235 double zoom = (zoom_value + 1.0f)/ 100.0f;
00236 int row_stride = ((width + 3)>> 2) << 2;
00237
00238 return static_cast<unsigned int>(row_stride * height * zoom * zoom);
00239 }
00240
00241 int PDFPage::get_bitmap_row_stride()
00242 {
00243 if (!bitmap)
00244 {
00245 return 0;
00246 }
00247
00248 return bitmap->getRowSize();
00249 }
00250
00251 const unsigned char* PDFPage::get_bitmap_data()
00252 {
00253 if (!bitmap)
00254 {
00255 return 0;
00256 }
00257
00258 return bitmap->getDataPtr();
00259 }
00260
00261 void PDFPage::update_bitmap(SplashBitmap *m)
00262 {
00263 if (bitmap == m)
00264 {
00265 return;
00266 }
00267
00268 bitmap = m;
00269 }
00270
00271 void PDFPage::update_links(Links *l)
00272 {
00273 if (links == l)
00274 {
00275 return;
00276 }
00277
00278 links = l;
00279 }
00280
00281 void PDFPage::update_text(TextPage *t)
00282 {
00283 if (text == t)
00284 {
00285 return;
00286 }
00287
00288 text = t;
00289 }
00290
00291 SearchResult PDFPage::search(SearchContext &ctx
00292 , PDFSearchPage &results)
00293 {
00294 if (!text)
00295 {
00296 return RES_ERROR;
00297 }
00298
00299 TextWordList *words = get_words_list();
00300 PluginRangeImpl *result = 0;
00301 SearchResult ret = RES_NOT_FOUND;
00302
00303 int count = 0;
00304
00305
00306
00307 if (ctx.forward)
00308 {
00309 if (ctx.word_cursor < 0)
00310 {
00311
00312 ctx.word_cursor = 0;
00313 }
00314
00315
00316 result = search_string_forward(ctx, words);
00317 if (ctx.search_all)
00318 {
00319 while(result != 0)
00320 {
00321 results.add(result);
00322 count++;
00323 result = search_string_forward(ctx, words);
00324 }
00325 }
00326 else
00327 {
00328 if (result)
00329 {
00330 results.add(result);
00331 count++;
00332 }
00333 }
00334
00335 }
00336 else
00337 {
00338 if (ctx.word_cursor < 0)
00339 {
00340
00341 ctx.word_cursor = words->getLength() - 1;
00342 }
00343
00344
00345 result = search_string_backward(ctx, words);
00346 if (ctx.search_all)
00347 {
00348 while(result != 0)
00349 {
00350 results.add(result);
00351 count++;
00352 result = search_string_backward(ctx, words);
00353 }
00354 }
00355 else
00356 {
00357 if (result)
00358 {
00359 results.add(result);
00360 count++;
00361 }
00362 }
00363 }
00364
00365 if (count > 0)
00366 {
00367 ret = RES_OK;
00368 }
00369
00370 delete words;
00371 return ret;
00372 }
00373
00374
00375
00376 PluginRangeImpl* PDFPage::search_string_forward(SearchContext &ctx
00377 , TextWordList *words)
00378 {
00379 if (ctx.dst_words.empty())
00380 {
00381 return 0;
00382 }
00383
00384
00385 PluginRangeImpl *result = 0;
00386
00387 int cur_word = ctx.word_cursor;
00388 int len = words->getLength();
00389 if (cur_word >= len)
00390 {
00391
00392 return 0;
00393 }
00394
00395 TextWord *word = words->get(cur_word);
00396 if (ctx.char_cursor >= word->getLength())
00397 {
00398
00399 cur_word++;
00400
00401 ctx.char_cursor = 0;
00402 }
00403
00404 SearchWords words_queue;
00405
00406 if (ctx.dst_words.size() > 1)
00407 {
00408
00409
00410 MatchStatus stat = STATUS_HEADER;
00411
00412
00413 int dst_word_index = 0;
00414
00415
00416
00417 int first_matched_word = cur_word;
00418
00419 while (cur_word < len)
00420 {
00421
00422 if (dst_word_index >= static_cast<int>(ctx.dst_words.size()))
00423 {
00424 break;
00425 }
00426
00427 word = words->get(cur_word);
00428
00429 if (word == 0)
00430 {
00431
00432 ERRORPRINTF("Null word in search");
00433 break;
00434 }
00435
00436
00437 string word_str;
00438 get_std_string_from_text_word(word, word_str);
00439
00440 if (word_str.empty())
00441 {
00442
00443 ERRORPRINTF("Empty word in search");
00444 break;
00445 }
00446
00447
00448 int start_result_idx = 0;
00449
00450
00451 bool forward = true;
00452 bool match_whole_word = ctx.match_whole_word;
00453 if (!match_whole_word)
00454 {
00455 if (stat == STATUS_BODY)
00456 {
00457
00458
00459 match_whole_word = true;
00460 }
00461 else if (stat == STATUS_HEADER)
00462 {
00463
00464
00465 forward = false;
00466
00467 ctx.char_cursor = static_cast<int>(word_str.size()) - 1;
00468 }
00469 }
00470
00471 if (compare_string(ctx.dst_words[dst_word_index]
00472 , word_str
00473 , ctx.case_sensitive
00474 , match_whole_word
00475 , forward
00476 , true
00477 , ctx.char_cursor
00478 , start_result_idx))
00479 {
00480
00481 int next_word_index = dst_word_index + 1;
00482
00483 switch (stat)
00484 {
00485 case STATUS_HEADER:
00486 if (next_word_index <
00487 (static_cast<int>(ctx.dst_words.size()) - 1))
00488 {
00489
00490 stat = STATUS_BODY;
00491 }
00492 else if (next_word_index ==
00493 (static_cast<int>(ctx.dst_words.size()) - 1))
00494 {
00495
00496 stat = STATUS_TAIL;
00497 }
00498
00499
00500 words_queue.add(SearchWordRecord(cur_word
00501 , start_result_idx
00502 , word->getLength() - 1));
00503
00504
00505 first_matched_word = cur_word;
00506
00507 break;
00508 case STATUS_BODY:
00509 if (next_word_index ==
00510 (static_cast<int>(ctx.dst_words.size()) - 1))
00511 {
00512
00513 stat = STATUS_TAIL;
00514 }
00515
00516
00517 words_queue.add(SearchWordRecord(cur_word
00518 , start_result_idx
00519 , word->getLength() - 1));
00520
00521 break;
00522 case STATUS_TAIL:
00523
00524
00525 words_queue.add(SearchWordRecord(cur_word
00526 , start_result_idx
00527 , start_result_idx
00528 + static_cast<int>(ctx.dst_words[dst_word_index].size()) - 1));
00529
00530 break;
00531 default:
00532 break;
00533 }
00534
00535
00536 dst_word_index = next_word_index;
00537 }
00538 else
00539 {
00540
00541 if (stat != STATUS_HEADER)
00542 {
00543 dst_word_index = 0;
00544 stat = STATUS_HEADER;
00545 words_queue.clear();
00546
00547
00548 cur_word = first_matched_word;
00549 }
00550 }
00551
00552
00553 cur_word++;
00554 ctx.char_cursor = 0;
00555 }
00556 }
00557 else
00558 {
00559
00560
00561 while (cur_word < len)
00562 {
00563
00564 word = words->get(cur_word);
00565
00566 if (word == 0)
00567 {
00568 ERRORPRINTF("Null word in search");
00569 break;
00570 }
00571
00572
00573 string word_str;
00574 get_std_string_from_text_word(word, word_str);
00575
00576 if (word_str.empty())
00577 {
00578 ERRORPRINTF("Empty word in search");
00579 break;
00580 }
00581
00582 int start_result_idx = 0;
00583 if (compare_string(ctx.dst_words[0]
00584 , word_str
00585 , ctx.case_sensitive
00586 , ctx.match_whole_word
00587 , true
00588 , false
00589 , ctx.char_cursor
00590 , start_result_idx))
00591 {
00592
00593
00594 words_queue.add(SearchWordRecord(cur_word
00595 , start_result_idx
00596 , start_result_idx + static_cast<int>(ctx.dst_words[0].size()) - 1));
00597 break;
00598 }
00599
00600
00601 cur_word++;
00602 ctx.char_cursor = 0;
00603 }
00604
00605 }
00606
00607
00608 if (words_queue.get_count() == static_cast<int>(ctx.dst_words.size()))
00609 {
00610 generate_search_result(ctx, words_queue, result, true);
00611 }
00612
00613 return result;
00614 }
00615
00616 PluginRangeImpl* PDFPage::search_string_backward(SearchContext &ctx
00617 , TextWordList *words)
00618 {
00619 int len = words->getLength();
00620 if (len <= 0)
00621 {
00622 return 0;
00623 }
00624
00625
00626 PluginRangeImpl *result = 0;
00627
00628 int cur_word = ctx.word_cursor;
00629 if (cur_word >= len)
00630 {
00631 cur_word = len - 1;
00632 }
00633
00634 TextWord *word = 0;
00635 if (ctx.char_cursor < 0)
00636 {
00637
00638 cur_word--;
00639 if (cur_word >= 0)
00640 {
00641 word = words->get(cur_word);
00642
00643 ctx.char_cursor = word->getLength() - 1;
00644 }
00645 }
00646
00647 SearchWords words_queue;
00648
00649 if (ctx.dst_words.size() > 1)
00650 {
00651
00652
00653 MatchStatus stat = STATUS_TAIL;
00654
00655
00656 int dst_words_end = static_cast<int>(ctx.dst_words.size()) - 1;
00657 int dst_word_index = dst_words_end;
00658
00659
00660
00661 int first_matched_word = cur_word;
00662
00663 while (cur_word >= 0)
00664 {
00665
00666 if (dst_word_index < 0)
00667 {
00668 break;
00669 }
00670
00671 word = words->get(cur_word);
00672
00673 if (word == 0)
00674 {
00675 ERRORPRINTF("Null word in search");
00676 break;
00677 }
00678
00679
00680 string word_str;
00681 get_std_string_from_text_word(word, word_str);
00682
00683 if (word_str.empty())
00684 {
00685 ERRORPRINTF("Empty word in search");
00686 break;
00687 }
00688
00689 int start_result_idx = 0;
00690 bool forward = false;
00691 bool match_whole_word = ctx.match_whole_word;
00692 if (!match_whole_word)
00693 {
00694 if (stat == STATUS_BODY)
00695 {
00696
00697
00698 match_whole_word = true;
00699 }
00700 else if (stat == STATUS_TAIL)
00701 {
00702
00703
00704 forward = true;
00705 ctx.char_cursor = 0;
00706 }
00707 }
00708
00709 if (compare_string(ctx.dst_words[dst_word_index]
00710 , word_str
00711 , ctx.case_sensitive
00712 , match_whole_word
00713 , forward
00714 , true
00715 , ctx.char_cursor
00716 , start_result_idx))
00717 {
00718
00719 int next_word_index = dst_word_index - 1;
00720 switch (stat)
00721 {
00722 case STATUS_TAIL:
00723 if (next_word_index > 0)
00724 {
00725
00726 stat = STATUS_BODY;
00727 }
00728 else if (next_word_index == 0)
00729 {
00730
00731 stat = STATUS_HEADER;
00732 }
00733
00734
00735 words_queue.add(SearchWordRecord(cur_word
00736 , start_result_idx
00737 , start_result_idx
00738 + static_cast<int>(ctx.dst_words[dst_word_index].size()) - 1));
00739
00740
00741 first_matched_word = cur_word;
00742
00743 break;
00744 case STATUS_BODY:
00745 if (next_word_index == 0)
00746 {
00747
00748 stat = STATUS_HEADER;
00749 }
00750
00751
00752 words_queue.add(SearchWordRecord(cur_word
00753 , start_result_idx
00754 , word->getLength() - 1));
00755
00756 break;
00757 case STATUS_HEADER:
00758
00759
00760 words_queue.add(SearchWordRecord(cur_word
00761 , start_result_idx
00762 , word->getLength() - 1));
00763
00764 break;
00765 default:
00766 break;
00767 }
00768
00769 dst_word_index = next_word_index;
00770 }
00771 else
00772 {
00773
00774 if (stat != STATUS_TAIL)
00775 {
00776 dst_word_index = dst_words_end;
00777 stat = STATUS_TAIL;
00778 words_queue.clear();
00779
00780
00781 cur_word = first_matched_word;
00782 }
00783 }
00784
00785
00786 cur_word--;
00787 if (cur_word >= 0)
00788 {
00789 ctx.char_cursor = words->get(cur_word)->getLength() - 1;
00790 }
00791 }
00792 }
00793 else
00794 {
00795 while (cur_word >= 0)
00796 {
00797
00798 word = words->get(cur_word);
00799
00800 if (word == 0)
00801 {
00802 ERRORPRINTF("Null word in search");
00803 break;
00804 }
00805
00806
00807 string word_str;
00808 get_std_string_from_text_word(word, word_str);
00809
00810 if (word_str.empty())
00811 {
00812 ERRORPRINTF("Empty word in search");
00813 break;
00814 }
00815
00816 int start_result_idx = 0;
00817 if (compare_string(ctx.dst_words[0]
00818 , word_str
00819 , ctx.case_sensitive
00820 , ctx.match_whole_word
00821 , false
00822 , false
00823 , ctx.char_cursor
00824 , start_result_idx))
00825 {
00826
00827
00828 words_queue.add(SearchWordRecord(cur_word
00829 , start_result_idx
00830 , start_result_idx + static_cast<int>(ctx.dst_words[0].size()) - 1));
00831 break;
00832 }
00833
00834 cur_word--;
00835 if (cur_word >= 0)
00836 {
00837 ctx.char_cursor = words->get(cur_word)->getLength() - 1;
00838 }
00839 }
00840 }
00841
00842
00843 if (words_queue.get_count() == static_cast<int>(ctx.dst_words.size()))
00844 {
00845 generate_search_result(ctx, words_queue, result, false);
00846 }
00847
00848 return result;
00849 }
00850
00851 void PDFPage::generate_search_result(SearchContext &ctx
00852 , SearchWords &queue
00853 , PluginRangeImpl* &result
00854 , bool forward)
00855 {
00856 if (queue.get_count() == 0)
00857 {
00858 return;
00859 }
00860
00861 result = new PluginRangeImpl;
00862 SearchWordRecord begin;
00863 SearchWordRecord end;
00864 if (forward)
00865 {
00866 begin = queue.front();
00867 end = queue.back();
00868 }
00869 else
00870 {
00871 begin = queue.back();
00872 end = queue.front();
00873 }
00874
00875 int idx_start = begin.start_char_index;
00876 int idx_end = end.end_char_index;
00877 int word_start = begin.word_index;
00878 int word_end = end.word_index;
00879
00880 if (forward)
00881 {
00882
00883 ctx.word_cursor = word_end;
00884 ctx.char_cursor = idx_end;
00885 }
00886 else
00887 {
00888
00889 ctx.word_cursor = word_start;
00890 ctx.char_cursor = idx_start;
00891 }
00892
00893 PDFAnchor param;
00894 param.page_num = page_number;
00895 param.word_num = word_start;
00896 param.char_idx = idx_start;
00897
00898 result->start_anchor = new StringImpl(param.get_string());
00899
00900 param.char_idx = idx_end;
00901 param.word_num = word_end;
00902 result->end_anchor = new StringImpl(param.get_string());
00903 }
00904
00905 bool PDFPage::compare_string(const string &dst
00906 , const string &src
00907 , bool case_sensitive
00908 , bool match_whole_word
00909 , bool forward
00910 , bool sub_string
00911 , const int start_char_idx
00912 , int &start_result_idx)
00913 {
00914 string dst_str = dst;
00915 string src_str = src;
00916
00917 if (!case_sensitive)
00918 {
00919
00920 std::transform(dst_str.begin(), dst_str.end(), dst_str.begin(), (int(*)(int))tolower);
00921
00922 std::transform(src_str.begin(), src_str.end(), src_str.begin(), (int(*)(int))tolower);
00923 }
00924
00925 if (match_whole_word)
00926 {
00927
00928 if (dst_str.size() != src_str.size())
00929 {
00930 return false;
00931 }
00932
00933 return dst_str == src_str;
00934 }
00935
00936 if (forward)
00937 {
00938
00939 if (src.size() - start_char_idx < dst_str.size())
00940 {
00941 return false;
00942 }
00943
00944 if (sub_string)
00945 {
00946
00947
00948 src_str = src_str.substr(start_char_idx, dst_str.size());
00949
00950 if (src_str == dst_str)
00951 {
00952
00953 start_result_idx = start_char_idx;
00954 }
00955 else
00956 {
00957
00958 start_result_idx = static_cast<int>(src_str.npos);
00959 }
00960 }
00961 else
00962 {
00963 start_result_idx = static_cast<int>(src_str.find(dst_str
00964 , static_cast<size_t>(start_char_idx)));
00965 }
00966 }
00967 else
00968 {
00969
00970 if (start_char_idx - static_cast<int>(dst_str.size()) + 1 < 0)
00971 {
00972 return false;
00973 }
00974
00975 if (sub_string)
00976 {
00977
00978 int pos = start_char_idx - static_cast<int>(dst_str.size()) + 1;
00979
00980 if (start_char_idx >= static_cast<int>(src_str.size()))
00981 {
00982 return false;
00983 }
00984
00985
00986 src_str = src_str.substr(pos, start_char_idx + 1);
00987
00988 if (src_str == dst_str)
00989 {
00990
00991 start_result_idx = pos;
00992 }
00993 else
00994 {
00995
00996 start_result_idx = static_cast<int>(src_str.npos);
00997 }
00998 }
00999 else
01000 {
01001 start_result_idx = static_cast<int>(src_str.rfind(dst_str
01002 , static_cast<size_t>(start_char_idx)));
01003 }
01004 }
01005
01006 if (static_cast<size_t>(start_result_idx) != src_str.npos)
01007 {
01008 return true;
01009 }
01010
01011 return false;
01012 }
01013
01014 bool merge_rectangle(const double x_min, const double y_min,
01015 const double x_max, const double y_max,
01016 PDFRectangle *rect)
01017 {
01018 if (!rect->isValid())
01019 {
01020
01021 rect->x1 = x_min;
01022 rect->x2 = x_max;
01023 rect->y1 = y_min;
01024 rect->y2 = y_max;
01025 return true;
01026 }
01027
01028 if (fabs(y_min - rect->y1) < ZERO_RANGE
01029 && fabs(y_max - rect->y2) < ZERO_RANGE)
01030 {
01031 rect->x1 = min(x_min, rect->x1);
01032 rect->x2 = max(x_max, rect->x2);
01033 return true;
01034 }
01035
01036 return false;
01037 }
01038
01039 bool PDFPage::get_bounding_rectangles(const string &start_anchor
01040 , const string &end_anchor
01041 , PDFRectangles &rects)
01042 {
01043 PDFAnchor start_param(start_anchor);
01044 PDFAnchor end_param(end_anchor);
01045
01046
01047 if (start_param.page_num != page_number ||
01048 start_param.page_num != end_param.page_num)
01049 {
01050 return false;
01051 }
01052
01053
01054
01055
01056
01057
01058 PDFRectangle pdf_rect;
01059 if (start_param.word_num >= 0 && end_param.word_num >= 0)
01060 {
01061 TextWordList * words = get_words_list();
01062 for(int i = start_param.word_num; i <= end_param.word_num; ++i)
01063 {
01064 double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01065 words->get(i)->getBBox(&x_min, &y_min, &x_max, &y_max);
01066
01067 if (!merge_rectangle(x_min, y_min, x_max, y_max, &pdf_rect))
01068 {
01069 PluginRectangle rect;
01070 rect.x = static_cast<int>(pdf_rect.x1);
01071 rect.y = static_cast<int>(pdf_rect.y1);
01072 rect.width = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01073 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01074 rects.add(rect);
01075
01076
01077 pdf_rect.x1 = x_min;
01078 pdf_rect.x2 = x_max;
01079 pdf_rect.y1 = y_min;
01080 pdf_rect.y2 = y_max;
01081 }
01082 }
01083
01084 delete words;
01085
01086 if (pdf_rect.isValid())
01087 {
01088
01089 PluginRectangle rect;
01090 rect.x = static_cast<int>(pdf_rect.x1);
01091 rect.y = static_cast<int>(pdf_rect.y1);
01092 rect.width = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01093 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01094 rects.add(rect);
01095 }
01096 }
01097 else if (start_param.link_idx >= 0 && end_param.link_idx >= 0)
01098 {
01099 if (links == 0)
01100 {
01101 return false;
01102 }
01103
01104 for(int i = start_param.link_idx; i <= end_param.link_idx; ++i)
01105 {
01106 double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01107 Link *link = links->getLink(i);
01108 link->getRect(&x_min, &y_min, &x_max, &y_max);
01109 int real_x_min, real_y_min, real_x_max, real_y_max;
01110 coordinates_user_to_dev(x_min, y_min, &real_x_min, &real_y_min);
01111 coordinates_user_to_dev(x_max, y_max, &real_x_max, &real_y_max);
01112 x_min = min(real_x_min, real_x_max);
01113 x_max = max(real_x_min, real_x_max);
01114 y_min = min(real_y_min, real_y_max);
01115 y_max = max(real_y_min, real_y_max);
01116
01117 if (!merge_rectangle(x_min, y_min, x_max, y_max, &pdf_rect))
01118 {
01119 PluginRectangle rect;
01120 rect.x = static_cast<int>(pdf_rect.x1);
01121 rect.y = static_cast<int>(pdf_rect.y1);
01122 rect.width = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01123 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01124 rects.add(rect);
01125
01126
01127 pdf_rect.x1 = x_min;
01128 pdf_rect.x2 = x_max;
01129 pdf_rect.y1 = y_min;
01130 pdf_rect.y2 = y_max;
01131 }
01132
01133 }
01134
01135 if (pdf_rect.isValid())
01136 {
01137
01138 PluginRectangle rect;
01139 rect.x = static_cast<int>(pdf_rect.x1);
01140 rect.y = static_cast<int>(pdf_rect.y1);
01141 rect.width = static_cast<int>(pdf_rect.x2 - pdf_rect.x1) + 1;
01142 rect.height = static_cast<int>(pdf_rect.y2 - pdf_rect.y1) + 1;
01143 rects.add(rect);
01144 }
01145 }
01146
01147 return true;
01148 }
01149
01150 bool PDFPage::render_splash_map(PDFRenderer *renderer, void *abort_data)
01151 {
01152 if (locked())
01153 {
01154 if (get_render_status() == RENDER_STOP)
01155 {
01156
01157
01158 unlock();
01159 }
01160 else
01161 {
01162
01163 LOGPRINTF("Locked, Cannot render\n");
01164 return false;
01165 }
01166 }
01167
01168
01169 destroy_links();
01170 doc_controller->update_memory_usage((-1) * destroy_bitmap());
01171
01172
01173 set_render_status(RENDER_RUNNING);
01174
01175
01176 RenderRet ret = Render_Error;
01177 SplashBitmap *b = 0;
01178 Links *l = 0;
01179
01180
01181 ScopeMutex m(&(renderer->get_render_mutex()));
01182
01183 ret = doc_controller->get_pdf_doc()->displayPage(
01184 renderer->get_splash_output_dev()
01185 , page_number
01186 , render_attr.get_real_zoom_value() * 0.01 * renderer->get_view_attr().get_device_dpi_h()
01187 , render_attr.get_real_zoom_value() * 0.01 * renderer->get_view_attr().get_device_dpi_v()
01188 , render_attr.get_rotate()
01189 , gFalse
01190 , gTrue
01191 , gTrue
01192 , abort_render_check
01193 , abort_data
01194 );
01195
01196 if (ret == Render_Error || ret == Render_Invalid)
01197 {
01198 LOGPRINTF("1. Error in rendering page:%d\n", get_page_num());
01199 return false;
01200 }
01201
01202
01203 b = renderer->get_splash_output_dev()->takeBitmap();
01204
01205
01206 #ifdef WIN32
01207 l = doc_controller->get_pdf_doc()->takeLinks();
01208 #else
01209 l = doc_controller->get_pdf_doc()->getLinks(page_number);
01210 #endif
01211
01212 if (ret == Render_Done)
01213 {
01214 update_bitmap(b);
01215 update_links(l);
01216
01217
01218 memcpy(ctm, renderer->get_splash_output_dev()->getDefCTM(), 6 * sizeof(double));
01219 memcpy(ictm, renderer->get_splash_output_dev()->getDefICTM(), 6 * sizeof(double));
01220
01221 doc_controller->update_memory_usage(length());
01222 LOGPRINTF("Rendering of page:%d Done! Length:%d\n", get_page_num(), length());
01223 return true;
01224 }
01225 else if (ret == Render_Abort)
01226 {
01227
01228 delete b;
01229 delete l;
01230 set_render_status(RENDER_STOP);
01231 LOGPRINTF("Rendering of page:%d is aborted! Task:%p\n", get_page_num(), abort_data);
01232 return false;
01233 }
01234
01235 LOGPRINTF("2. Error in rendering page:%d\n", get_page_num());
01236 return false;
01237 }
01238
01239 void PDFPage::set_render_status(RenderStatus s)
01240 {
01241 render_status = s;
01242 }
01243
01244 bool PDFPage::render_text(PDFRenderer *renderer, bool use_defalt_setting)
01245 {
01246 static const double DEFAULT_ZOOM = 0.2f;
01247
01248 destroy_text();
01249
01250
01251
01252 ScopeMutex m(&(renderer->get_render_mutex()));
01253
01254 doc_controller->get_pdf_doc()->displayPage(
01255 renderer->get_text_output_dev()
01256 , page_number
01257 , (use_defalt_setting ? DEFAULT_ZOOM : render_attr.get_real_zoom_value() * 0.01) *
01258 renderer->get_view_attr().get_device_dpi_h()
01259 , (use_defalt_setting ? DEFAULT_ZOOM : render_attr.get_real_zoom_value() * 0.01) *
01260 renderer->get_view_attr().get_device_dpi_v()
01261 , render_attr.get_rotate()
01262 , gFalse
01263 , gTrue
01264 , gFalse
01265 );
01266
01267 update_text(renderer->get_text_output_dev()->takeText());
01268
01269 return true;
01270 }
01271
01272 bool PDFPage::get_content_area(PDFRenderer *renderer, RenderArea &area)
01273 {
01274 static const double SHRINK_ZOOM = 0.2f;
01275 static const int EXPAND_STEP = 2;
01276
01277 if (!is_render_area_valid(content_area))
01278 {
01279
01280 ScopeMutex m(&(renderer->get_render_mutex()));
01281
01282 RenderRet ret = doc_controller->get_pdf_doc()->displayPage(
01283 renderer->get_thumbnail_output_dev()
01284 , get_page_num()
01285 , SHRINK_ZOOM * renderer->get_view_attr().get_device_dpi_h()
01286 , SHRINK_ZOOM * renderer->get_view_attr().get_device_dpi_v()
01287 , 0
01288 , gFalse
01289 , gFalse
01290 , gFalse
01291 );
01292
01293 if (ret == Render_Error || ret == Render_Invalid)
01294 {
01295 ERRORPRINTF("Error in rendering thumbnail page:%d\n", get_page_num());
01296 return false;
01297 }
01298
01299 SplashBitmap *thumb_map = renderer->get_thumbnail_output_dev()->takeBitmap();
01300 PDFRectangle content_rect;
01301 bool succeed = get_content_from_bitmap(thumb_map, content_rect);
01302
01303 double page_width = thumb_map->getWidth();
01304 double page_height = thumb_map->getHeight();
01305 delete thumb_map;
01306 if (!succeed)
01307 {
01308
01309 content_rect.x1 = content_rect.y1 = 0;
01310 content_rect.x2 = page_width;
01311 content_rect.y2 = page_height;
01312 }
01313 else
01314 {
01315
01316 double inc_x2 = 0;
01317 double inc_y2 = 0;
01318
01319
01320 if (content_rect.x1 > EXPAND_STEP)
01321 {
01322 content_rect.x1 -= EXPAND_STEP;
01323 inc_x2 = EXPAND_STEP;
01324 }
01325 else
01326 {
01327 inc_x2 = content_rect.x1;
01328 content_rect.x1 = 0;
01329 }
01330
01331
01332 if (content_rect.y1 > EXPAND_STEP)
01333 {
01334 content_rect.y1 -= EXPAND_STEP;
01335 inc_y2 = EXPAND_STEP;
01336 }
01337 else
01338 {
01339 inc_y2 = content_rect.y1;
01340 content_rect.y1 = 0;
01341 }
01342
01343
01344 content_rect.x2 += (inc_x2 + 1);
01345 if (content_rect.x2 > page_width)
01346 {
01347 content_rect.x2 = page_width;
01348 }
01349
01350
01351 content_rect.y2 += (inc_y2 + 1);
01352 if (content_rect.y2 > page_height)
01353 {
01354 content_rect.y2 = page_height;
01355 }
01356 }
01357
01358 content_area.x_offset = static_cast<float>(content_rect.x1 / page_width);
01359 content_area.y_offset = static_cast<float>(content_rect.y1 / page_height);
01360
01361 content_area.width =
01362 static_cast<float>((content_rect.x2 - content_rect.x1) / page_width);
01363 content_area.height =
01364 static_cast<float>((content_rect.y2 - content_rect.y1) / page_height);
01365
01366 if (content_area.width > 1.0f)
01367 {
01368 content_area.x_offset = 0.0f;
01369 content_area.width = 1.0f;
01370 }
01371 if (content_area.height > 1.0f)
01372 {
01373 content_area.y_offset = 0.0f;
01374 content_area.height = 1.0f;
01375 }
01376 }
01377
01378 area = content_area;
01379
01380 return true;
01381 }
01382
01383 bool PDFPage::get_content_from_bitmap(SplashBitmap *bitmap, PDFRectangle &rect)
01384 {
01385 static const int BACKGROUND_COLOR = 255;
01386 static const int LINE_STEP = 1;
01387 static const int SHRINK_STEP = 1;
01388 static const double SHRINK_RANGE = 0.3f;
01389
01390
01391 int x1 = 0;
01392 int y1 = 0;
01393
01394 int x2 = bitmap->getWidth();
01395 int y2 = bitmap->getHeight();
01396
01397 int left_edge = static_cast<int>(SHRINK_RANGE * x2);
01398 int right_edge = static_cast<int>((1.0f - SHRINK_RANGE) * x2);
01399 int top_edge = static_cast<int>(SHRINK_RANGE * y2);
01400 int bottom_edge = static_cast<int>((1.0f - SHRINK_RANGE) * y2);
01401
01402
01403 Guchar cur_pix;
01404 bool stop[4] = {false, false, false, false};
01405
01406 while (!stop[0] || !stop[1] || !stop[2] || !stop[3])
01407 {
01408
01409 int x_cur = x1;
01410 while (x_cur < x2 && !stop[0])
01411 {
01412 bitmap->getPixel(x_cur, y1, &cur_pix);
01413 if (cur_pix != BACKGROUND_COLOR)
01414 {
01415 stop[0] = true;
01416 break;
01417 }
01418 x_cur += LINE_STEP;
01419 }
01420
01421
01422 x_cur = x1;
01423 while (x_cur < x2 && !stop[1])
01424 {
01425 bitmap->getPixel(x_cur, y2, &cur_pix);
01426 if (cur_pix != BACKGROUND_COLOR)
01427 {
01428 stop[1] = true;
01429 break;
01430 }
01431 x_cur += LINE_STEP;
01432 }
01433
01434
01435 int y_cur = y1;
01436 while (y_cur < y2 && !stop[2])
01437 {
01438 bitmap->getPixel(x1, y_cur, &cur_pix);
01439 if (cur_pix != BACKGROUND_COLOR)
01440 {
01441 stop[2] = true;
01442 break;
01443 }
01444 y_cur += LINE_STEP;
01445 }
01446
01447
01448 y_cur = y1;
01449 while (y_cur < y2 && !stop[3])
01450 {
01451 bitmap->getPixel(x2, y_cur, &cur_pix);
01452 if (cur_pix != BACKGROUND_COLOR)
01453 {
01454 stop[3] = true;
01455 break;
01456 }
01457 y_cur += LINE_STEP;
01458 }
01459
01460
01461 if (!stop[2])
01462 {
01463 if (x1 >= left_edge)
01464 {
01465 stop[2] = true;
01466 }
01467 else
01468 {
01469 x1 += SHRINK_STEP;
01470 }
01471 }
01472
01473 if (!stop[3])
01474 {
01475 if (x2 <= right_edge)
01476 {
01477 stop[3] = true;
01478 }
01479 else
01480 {
01481 x2 -= SHRINK_STEP;
01482 }
01483 }
01484
01485 if (!stop[0])
01486 {
01487 if (y1 >= top_edge)
01488 {
01489 stop[0] = true;
01490 }
01491 else
01492 {
01493 y1 += SHRINK_STEP;
01494 }
01495 }
01496
01497 if (!stop[1])
01498 {
01499 if (y2 <= bottom_edge)
01500 {
01501 stop[1] = true;
01502 }
01503 else
01504 {
01505 y2 -= SHRINK_STEP;
01506 }
01507 }
01508
01509 }
01510
01511 if (stop[0] && stop[1] && stop[2] && stop[3])
01512 {
01513 rect.x1 = x1;
01514 rect.x2 = x2;
01515 rect.y1 = y1;
01516 rect.y2 = y2;
01517 return true;
01518 }
01519
01520 return false;
01521 }
01522
01523 GBool PDFPage::abort_render_check(void *data)
01524 {
01525 Task *task = static_cast<Task*>(data);
01526
01527 return static_cast<GBool>(task->is_aborted());
01528 }
01529
01530
01531 void PDFPage::get_anchor_param_from_coordinates(double x, double y
01532 , PDFAnchor ¶m)
01533 {
01534 int i;
01535
01536 int link_index = -1;
01537 if (links && links->onLink(x, y))
01538 {
01539 int link_num;
01540 Link * link;
01541
01542 link_num = links->getNumLinks();
01543 for (i = 0; i < link_num; i++)
01544 {
01545 link = links->getLink(i);
01546 if (link && link->inRect(x, y))
01547 {
01548 link_index = i;
01549 break;
01550 }
01551 }
01552 }
01553
01554
01555
01556
01557
01558
01559 int dx, dy;
01560 coordinates_user_to_dev(x, y, &dx, &dy);
01561
01562 int word_index = -1, char_index = -1;
01563
01564 TextWordList * words = get_words_list();
01565 if (words != 0)
01566 {
01567 int words_num = words->getLength();
01568 TextWord * word = 0;
01569 double x_min = 0.0, y_min = 0.0, x_max = 0.0, y_max = 0.0;
01570 for(i = 0; i < words_num; i++)
01571 {
01572 word = words->get(i);
01573 word->getBBox(&x_min, &y_min, &x_max, &y_max);
01574
01575 if ((x_min <= dx) && (dx <= x_max)
01576 && (y_min <= dy) && (dy <= y_max))
01577 {
01578 word_index = i;
01579
01580 int chars_num = word->getLength();
01581 for (int j = 0; j < chars_num; j++)
01582 {
01583 #ifdef WIN32
01584 x_min = word->getEdge(j);
01585 x_max = word->getEdge(j+1);
01586 #else
01587 word->getCharBBox(j, &x_min, &y_min, &x_max, &y_max);
01588 #endif
01589 if ((x_min <= dx) && (dx <= x_max)
01590 && (y_min <= dy) && (dy <= y_max))
01591 {
01592 char_index = j;
01593 break;
01594 }
01595 }
01596 break;
01597 }
01598 }
01599
01600 delete words;
01601 }
01602
01603
01604 param.page_num = page_number;
01605 param.link_idx = link_index;
01606 param.word_num = word_index;
01607 param.char_idx = char_index;
01608
01609 }
01610
01611 bool PDFPage::get_range_param_by_word_index(const int word_index,
01612 PDFAnchor & start_param,
01613 PDFAnchor & end_param)
01614 {
01615 TextWordList * words = get_words_list();
01616 bool ret = false;
01617 if (words != 0)
01618 {
01619 int words_num = words->getLength();
01620 if (word_index >= 0 && word_index < words_num)
01621 {
01622 TextWord * word = words->get(word_index);
01623
01624 start_param.page_num = page_number;
01625 start_param.word_num = word_index;
01626 start_param.char_idx = 0;
01627
01628
01629 end_param.page_num = page_number;
01630 end_param.word_num = word_index;
01631 end_param.char_idx = word->getLength();
01632
01633
01634 ret = true;
01635 }
01636 delete words;
01637 }
01638 return ret;
01639 }
01640
01641 bool PDFPage::get_text_by_range(const PDFAnchor & start_param,
01642 const PDFAnchor & end_param,
01643 std::string &result)
01644 {
01645 if (!end_param.is_end_anchor() && start_param.page_num != end_param.page_num)
01646 {
01647
01648 return false;
01649 }
01650
01651 TextWordList * words = get_words_list();
01652 result.clear();
01653 if (words != 0)
01654 {
01655 int words_num = words->getLength();
01656 int start_index = start_param.word_num;
01657 int end_index = end_param.is_end_anchor() ? words_num : end_param.word_num;
01658
01659
01660
01661 TextWord * word;
01662 string text;
01663
01664 for (int idx = start_index; idx < end_index; ++idx)
01665 {
01666 word = words->get(idx);
01667 if (word == 0)
01668 {
01669
01670 ERRORPRINTF("Null word in search");
01671 break;
01672 }
01673
01674 get_std_string_from_text_word(word, text);
01675 result += text;
01676
01677 result += " ";
01678 }
01679
01680
01681 if (start_index <= end_index)
01682 {
01683 word = words->get(end_index);
01684 if (word)
01685 {
01686 get_std_string_from_text_word(word, text);
01687 result += text;
01688 }
01689 }
01690
01691 delete words;
01692 }
01693
01694 return true;
01695 }
01696
01697 bool PDFPage::get_range_param_by_link_index(const int link_index,
01698 PDFAnchor & start_param,
01699 PDFAnchor & end_param)
01700 {
01701 if (links)
01702 {
01703
01704 if (link_index < 0 || link_index >= links->getNumLinks())
01705 {
01706 ERRORPRINTF("Error Link Index");
01707 return false;
01708 }
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01725
01726 start_param.page_num = page_number;
01727 start_param.link_idx = link_index;
01728
01729
01730 end_param.page_num = page_number;
01731 end_param.link_idx = link_index;
01732
01733 return true;
01734 }
01735
01736 return false;
01737 }
01738
01739 int PDFPage::get_goto_page_of_link(int link_index)
01740 {
01741 if (links == 0)
01742 {
01743 return 0;
01744 }
01745
01746
01747 if (link_index < 0 || link_index >= links->getNumLinks())
01748 {
01749 ERRORPRINTF("Error Link Index");
01750 return 0;
01751 }
01752
01753 Link *link = links->getLink(link_index);
01754 int page_num = 0;
01755 if (link != 0)
01756 {
01757 LinkAction *action = link->getAction();
01758 LinkActionKind kind = action->getKind();
01759 if (kind == actionGoTo)
01760 {
01761
01762 #ifdef WIN32
01763 UGooString *named_dest = 0;
01764 #else
01765 GooString *named_dest = 0;
01766 #endif
01767 PDFDoc *doc = doc_controller->get_pdf_doc();
01768
01769 LinkDest *dest = ((LinkGoTo *)action)->getDest();
01770 named_dest = ((LinkGoTo *)action)->getNamedDest();
01771
01772 if (dest && dest->isPageRef())
01773 {
01774 Ref pageRef = dest->getPageRef();
01775 page_num = doc->findPage(pageRef.num, pageRef.gen);
01776 }
01777 else if (named_dest)
01778 {
01779 dest = doc->findDest(named_dest);
01780 if (dest)
01781 {
01782 Ref pageRef = dest->getPageRef();
01783 page_num = doc->findPage(pageRef.num, pageRef.gen);
01784 }
01785 }
01786 }
01787 }
01788 return page_num;
01789 }
01790
01791 bool PDFPage::is_hyper_linked_page(int dst_page_num)
01792 {
01793 if (links == 0)
01794 {
01795 return false;
01796 }
01797
01798 int link_num = links->getNumLinks();
01799 if (link_num <= 0)
01800 {
01801 return false;
01802 }
01803
01804 for (int i = 0; i < link_num; ++i)
01805 {
01806 if (dst_page_num == get_goto_page_of_link(i))
01807 {
01808 return true;
01809 }
01810 }
01811
01812 return false;
01813 }
01814
01815
01816 bool PDFPage::get_goto_anchor_of_link(int link_index, std::string & anchor)
01817 {
01818
01819 int page_num = get_goto_page_of_link(link_index);
01820 if (page_num > 0)
01821 {
01822 PDFAnchor param;
01823 param.page_num = page_num;
01824
01825 anchor = param.get_string();
01826 return true;
01827 }
01828
01829 return false;
01830 }
01831
01832 void PDFPage::coordinates_dev_to_user(const double dx, const double dy,
01833 double * ux, double *uy)
01834 {
01835 *ux = ictm[0] * dx + ictm[2] * dy + ictm[4];
01836 *uy = ictm[1] * dx + ictm[3] * dy + ictm[5];
01837 }
01838
01839 void PDFPage::coordinates_user_to_dev(const double ux, const double uy,
01840 int * dx, int *dy)
01841 {
01842 *dx = (int)(ctm[0] * ux + ctm[2] * uy + ctm[4] + 0.5);
01843 *dy = (int)(ctm[1] * ux + ctm[3] * uy + ctm[5] + 0.5);
01844 }
01845
01846 }
01847
01848