pdf_doc_controller.cpp

Go to the documentation of this file.
00001 /*
00002  * File Name: pdf_doc_controller.cpp
00003  */
00004 
00005 /*
00006  * This file is part of uds-plugin-pdf.
00007  *
00008  * uds-plugin-pdf is free software: you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation, either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * uds-plugin-pdf is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program. If not, see <http://www.gnu.org/licenses/>.
00020  */
00021 
00022 /**
00023  * Copyright (C) 2008 iRex Technologies B.V.
00024  * All rights reserved.
00025  */
00026 
00027 #include "pdf_library.h"
00028 #include "pdf_doc_controller.h"
00029 #include "pdf_render_task.h"
00030 #include "pdf_search_task.h"
00031 #include "pdf_anchor.h"
00032 
00033 #ifdef WIN32
00034 #include "poppler/SecurityHandler.h"
00035 void CDECL error(int pos, char *msg, ...) {
00036 }
00037 
00038 void * StandardSecurityHandler::getAuthData()
00039 {
00040     return 0;
00041 }
00042 #endif
00043 
00044 namespace pdf
00045 {
00046 
00047 // Wrap the global parameters, it is a sigleton class
00048 class PDFGlobalParams
00049 {
00050 public:
00051     ~PDFGlobalParams()
00052     {
00053         // Global variables will be destoried automatically.
00054         delete globalParams;
00055         globalParams = 0;
00056     }
00057 
00058     static PDFGlobalParams & make_instance()
00059     {
00060         static PDFGlobalParams params;
00061         return params;
00062     }
00063 
00064 private:
00065     PDFGlobalParams()
00066     {
00067         //Initialize the global params
00068         // Notes: globalParams is defined in poppler library.
00069 #ifdef WIN32
00070         globalParams = new GlobalParams("");
00071 #else        
00072         globalParams = new GlobalParams();
00073 #endif       
00074     }
00075 
00076     PDFGlobalParams(const PDFGlobalParams&);
00077 };
00078 
00079 PDFController::PDFController(void)
00080 : pages_cache()
00081 , pdf_doc(0)
00082 , toc(this)
00083 , renderer()
00084 , current_page_num(1)
00085 , searcher(this)
00086 , file_name()
00087 , prerender_policy(new PDFPrerenderPolicyNormal)
00088 {
00089     PDFLibrary::instance().add_document(this);
00090 }
00091 
00092 PDFController::~PDFController(void)
00093 {
00094     PDFLibrary::instance().remove_document(this);
00095 
00096     // close the previous document if it exists
00097     if (pdf_doc)
00098     {
00099         close();
00100     }
00101 
00102     delete prerender_policy;
00103 }
00104 
00105 PluginStatus PDFController::open(const string &path)
00106 {
00107     // close the previous document if it exists
00108     if (pdf_doc)
00109     {
00110         close();
00111     }
00112 
00113     // initialize the pdf global parameters
00114     PDFGlobalParams::make_instance();
00115 
00116     GooString * name = new GooString(path.c_str());
00117 
00118     // create PDFDoc instance
00119     pdf_doc = new PDFDoc(name);
00120 
00121     if (!pdf_doc->isOk())
00122     {
00123         ERRORPRINTF("could not open poppler doc %s", path.c_str());
00124         return PLUGIN_ERROR_OPEN_FILE;
00125     }
00126 
00127     if (!renderer.initialize(this))
00128     {
00129         return PLUGIN_ERROR_OPEN_FILE;
00130     }
00131 
00132     // set the file name
00133     file_name = path;
00134     return PLUGIN_OK;
00135 }
00136 
00137 bool PDFController::close()
00138 {
00139     // remove all of the tasks related to this document
00140     PDFLibrary::instance().remove_tasks_by_document(this);
00141 
00142     renderer.destroy();
00143 
00144     if (pdf_doc != 0)
00145     {
00146         delete pdf_doc;
00147         pdf_doc = 0;
00148     }
00149 
00150     return true;
00151 }
00152 
00153 PDFToc * PDFController::get_toc(void)
00154 {
00155     return &toc;
00156 }
00157 
00158 unsigned int PDFController::page_count()
00159 {
00160     if (pdf_doc == 0)
00161     {
00162         return 0;
00163     }
00164 
00165     return pdf_doc->getNumPages();
00166 }
00167 
00168 bool PDFController::get_anchor_of_page(const unsigned int page_number
00169                                        , string &anchor)
00170 {
00171     PDFAnchor param;
00172     param.page_num = page_number;
00173     //param.file_name = name();
00174 
00175     anchor = param.get_string();
00176     return true;
00177 }
00178 
00179 unsigned int PDFController::get_page_number_of_anchor(const string &anchor)
00180 {
00181     PDFAnchor param(anchor);
00182     return static_cast<unsigned int>(param.page_num);
00183 }
00184 
00185 PagePtr PDFController::get_page(const int page_num)
00186 {
00187     return pages_cache.get_page(page_num);
00188 }
00189 
00190 int PDFController::compare_anchor(const string & first,
00191                    const string & second)
00192 {
00193     PDFAnchor first_param(first);
00194     PDFAnchor second_param(second);
00195     return compare_anchor_param(first_param, second_param);
00196 }
00197 
00198 int PDFController::compare_anchor_param(const PDFAnchor & first_param,
00199                                         const PDFAnchor & second_param)
00200 {
00201     return PDFAnchor::compare(first_param, second_param);
00202 }
00203 
00204 bool PDFController::get_hyperlinks_in_page(int page_num,
00205                                            PDFAnchor *start_param,
00206                                            PDFAnchor *end_param,
00207                                            PDFRangeCollection &results)
00208 {
00209     PagePtr page = get_page(page_num);
00210     Links *links = 0;
00211     bool destroy_links = false;
00212     bool ret = true;
00213     if (page == 0 || page->get_links() == 0)
00214     {
00215         links = pdf_doc->generateLinks(page_num);
00216         destroy_links = true;
00217     }
00218     else
00219     {
00220         links = page->get_links();
00221     }
00222 
00223     if (links->getNumLinks() <= 0)
00224     {
00225         ret = false;
00226     }
00227 
00228     // get all of the hyperlinks from this page
00229     for (int i = 0; i < links->getNumLinks(); ++i)
00230     {
00231         PDFAnchor link_start, link_end;
00232         link_start.page_num = link_end.page_num = page_num;
00233         link_start.link_idx = link_end.link_idx = i;
00234         //link_start.file_name = link_end.file_name = name();
00235 
00236         int comp_ret_start = start_param != 0 ? PDFAnchor::compare(link_start, *start_param) :
00237             1;
00238         int comp_ret_end = end_param != 0 ? PDFAnchor::compare(link_end, *end_param) :
00239             1;
00240 
00241         if (comp_ret_start == 1 && comp_ret_end == 1)
00242         {
00243             PluginRangeImpl *result = new PluginRangeImpl;
00244             result->start_anchor = new StringImpl(link_start.get_string());
00245             result->end_anchor = new StringImpl(link_end.get_string());
00246             results.add(result);
00247         }
00248     }
00249 
00250     if (destroy_links)
00251     {
00252         delete links;
00253         links = 0;
00254     }
00255     return ret;
00256 }
00257 
00258 PDFCollectionBase* PDFController::get_hyperlinks_from_range(const string &start,
00259                                                             const string &end)
00260 {
00261     PDFAnchor start_param(start), end_param(end);
00262 
00263     PDFRangeCollection *results = new PDFRangeCollection;
00264     if (start_param.page_num == end_param.page_num)
00265     {
00266         get_hyperlinks_in_page(start_param.page_num, &start_param, &end_param
00267             , *results);
00268         return results;
00269     }
00270 
00271     get_hyperlinks_in_page(start_param.page_num, &start_param, 0, *results);
00272 
00273     if (end_param.is_end_anchor())
00274     {
00275         return results;
00276     }
00277     
00278     int cur_page_num = start_param.page_num + 1;
00279     while (cur_page_num < end_param.page_num)
00280     {
00281         get_hyperlinks_in_page(cur_page_num, 0, 0, *results);
00282         cur_page_num ++;
00283     }
00284     
00285     get_hyperlinks_in_page(cur_page_num, 0, &end_param, *results);
00286 
00287     return results;
00288 }
00289 
00290 bool PDFController::get_text_from_range(const PDFAnchor &start,
00291                                         const PDFAnchor &end,
00292                                         string &result)
00293 {
00294     bool ret = false;
00295     result.clear();
00296 
00297     PagePtr page = 0;
00298     if ( start.page_num == end.page_num || end.is_end_anchor() )
00299     {
00300         page = get_page(start.page_num);
00301         ret = page->get_text_by_range(start, end, result);
00302     }
00303     else
00304     {
00305         // make an end anchor
00306         PDFAnchor end_anchor;
00307         end_anchor.set_end_anchor();
00308         string  text;
00309 
00310         // get text from start page
00311         page = get_page(start.page_num);
00312         ret = page->get_text_by_range(start, end_anchor, text);
00313         if (!ret)
00314         {
00315             return false;
00316         }
00317         result += text;
00318 
00319         // get text from middle pages
00320         for (int idx = (start.page_num + 1); idx < end.page_num; ++idx)
00321         {
00322             // make start anchor of current page
00323             PDFAnchor start_anchor;
00324             start_anchor.page_num = idx;
00325             start_anchor.word_num = 0;
00326 
00327             page = get_page(idx);
00328             ret = page->get_text_by_range(start_anchor, end_anchor, text);
00329             if (!ret)
00330             {
00331                 break;
00332             }
00333 
00334             result += text;
00335         }
00336 
00337         // get text from the last page
00338         PDFAnchor start_anchor;
00339         start_anchor.page_num = end.page_num;
00340         start_anchor.word_num = 0;
00341 
00342         page = get_page(end.page_num);
00343         ret = page->get_text_by_range(start_anchor, end, text);
00344         if (!ret)
00345         {
00346             return false;
00347         }
00348         result += text;
00349     }
00350     return ret;
00351 }
00352 
00353 bool PDFController::get_prev_page_anchor(string & anchor)
00354 {
00355     PDFAnchor current_page(anchor);
00356     int prev_page = current_page.page_num - 1;
00357     if (prev_page < 1)
00358     {
00359         return false;
00360     }
00361 
00362     current_page.page_num  = prev_page;
00363     //current_page.file_name = name();
00364     anchor = current_page.get_string();
00365     return true;
00366 }
00367 
00368 bool PDFController::get_next_page_anchor(string & anchor)
00369 {
00370     PDFAnchor current_page(anchor);
00371     int next_page = current_page.page_num + 1;
00372     if (next_page > static_cast<int>(page_count()))
00373     {
00374         return false;
00375     }
00376 
00377     current_page.page_num  = next_page;
00378     //current_page.file_name = name();
00379     anchor = current_page.get_string();
00380     return true;
00381 }
00382 
00383 bool PDFController::search_next(const PDFSearchCriteria &criteria
00384                                 , const string &from_anchor
00385                                 , unsigned int search_id)
00386 {
00387     PDFSearchTask *task = new PDFSearchTask(criteria, from_anchor
00388         , PDF_SEARCH_NEXT, &searcher, search_id);
00389 
00390     PDFLibrary::instance().thread_add_search_task(task);
00391 
00392     return true;
00393 }
00394 
00395 bool PDFController::search_all(const PDFSearchCriteria &criteria
00396                                , unsigned int search_id)
00397 {
00398     string s;
00399     PDFSearchTask *task = new PDFSearchTask(criteria, s
00400         , PDF_SEARCH_ALL, &searcher, search_id);
00401 
00402     PDFLibrary::instance().thread_add_search_task(task);
00403 
00404     return true;
00405 }
00406 
00407 bool PDFController::abort_search(unsigned int search_id)
00408 {
00409     return PDFLibrary::instance().get_thread().abort_task(this, TASK_SEARCH
00410         , search_id);
00411 }
00412 
00413 double PDFController::get_page_crop_width(const int page_num)
00414 {
00415     PluginRotationDegree rotation = get_page_original_rotation(page_num);
00416     int width = 0;
00417     double crop_width  = pdf_doc->getPageCropWidth(page_num);
00418     double crop_height = pdf_doc->getPageCropHeight(page_num);
00419 
00420     if (rotation == Clockwise_Degrees_90 ||
00421         rotation == Clockwise_Degrees_270  )
00422     {
00423         width = static_cast<int>(crop_height);
00424     }
00425     else
00426     {
00427         width = static_cast<int>(crop_width);
00428     }
00429     return width * renderer.get_view_attr().get_device_dpi_h() / 72.0f;
00430 }
00431 
00432 double PDFController::get_page_crop_height(const int page_num)
00433 {
00434     PluginRotationDegree rotation = get_page_original_rotation(page_num);
00435     int height = 0;
00436     double crop_width  = pdf_doc->getPageCropWidth(page_num);
00437     double crop_height = pdf_doc->getPageCropHeight(page_num);
00438 
00439     if (rotation == Clockwise_Degrees_90 ||
00440         rotation == Clockwise_Degrees_270  )
00441     {
00442         height = static_cast<int>(crop_width);
00443     }
00444     else
00445     {
00446         height = static_cast<int>(crop_height);
00447     }
00448     return height * renderer.get_view_attr().get_device_dpi_v() / 72.0f;
00449 }
00450 
00451 bool PDFController::get_page_content_area(const string &anchor, RenderArea &area)
00452 {
00453     int page_number = get_page_number_of_anchor(anchor);
00454 
00455     if (page_number <= 0 || page_number > static_cast<int>(page_count()))
00456     {
00457         // invalid page number
00458         ERRORPRINTF("Invalid Page Number when getting content area");
00459         return false;
00460     }
00461 
00462     PagePtr page = get_page(page_number);
00463 
00464     if (page == 0)
00465     {
00466         return false;
00467     }
00468 
00469     return page->get_content_area(&renderer, area);
00470 }
00471 
00472 bool PDFController::make_enough_memory(const int page_num, const int length)
00473 {
00474     return pages_cache.make_enough_memory(page_num, length);
00475 }
00476 
00477 void PDFController::clear_cached_bitmaps()
00478 {
00479     pages_cache.clear_cached_bitmaps();
00480 }
00481 
00482 void PDFController::update_memory_usage(const int length)
00483 {
00484     pages_cache.update_mem_usage(length);
00485 }
00486 
00487 bool PDFController::get_page_crop_width(const string &anchor, double &width)
00488 {
00489     int page_number = get_page_number_of_anchor(anchor);
00490 
00491     if (page_number > 0 && page_number <= static_cast<int>(page_count()))
00492     {
00493         width = get_page_crop_width(page_number);
00494         return true;
00495     }
00496 
00497     return false;
00498 }
00499 
00500 bool PDFController::get_page_crop_height(const string &anchor, double &height)
00501 {
00502     int page_number = get_page_number_of_anchor(anchor);
00503 
00504     if (page_number > 0 && page_number <= static_cast<int>(page_count()))
00505     {
00506         height = get_page_crop_height(page_number);
00507         return true;
00508     }
00509 
00510     return false;
00511 }
00512 
00513 PluginRotationDegree PDFController::get_page_original_rotation(const int page_num)
00514 {
00515     PluginRotationDegree rotation = Clockwise_Degrees_0;
00516 
00517     switch( pdf_doc->getPageRotate(page_num) )
00518     {
00519         case 90:
00520             rotation = Clockwise_Degrees_90;
00521             break;
00522         case 180:
00523             rotation = Clockwise_Degrees_180;
00524             break;
00525         case 270:
00526             rotation = Clockwise_Degrees_270;
00527             break;
00528         default:
00529             ; //ignore
00530     }
00531 
00532     return rotation;
00533 }
00534 
00535 PluginRotationDegree PDFController::get_page_original_rotation(const string &anchor)
00536 {
00537     int page_number = get_page_number_of_anchor(anchor);
00538 
00539     if (page_number <= 0 || page_number > static_cast<int>(page_count()))
00540     {
00541         // invalid page number
00542         return Clockwise_Degrees_0;
00543     }
00544 
00545     return get_page_original_rotation(page_number);
00546 }
00547 
00548 bool PDFController::is_anchor_in_current_document(const string &anchor)
00549 {
00550     PDFAnchor anchor_param(anchor);
00551 
00552     // check the file name
00553     if (!anchor_param.file_name.empty() &&
00554         anchor_param.file_name != name())
00555     {
00556         return false;
00557     }
00558 
00559     // check the page number
00560     if (anchor_param.page_num <= 0 ||
00561         anchor_param.page_num > static_cast<int>(page_count()))
00562     {
00563         return false;
00564     }
00565 
00566     return true;
00567 }
00568 
00569 bool PDFController::get_bounding_rectangles(const string &start_anchor
00570                                           , const string &end_anchor
00571                                           , PDFRectangles &rects)
00572 {
00573     int page_num = get_page_number_of_anchor(start_anchor);
00574 
00575     //Get the rendered page
00576     PagePtr page = get_page(page_num);
00577     if (!page)
00578     {
00579         return false;
00580     }
00581 
00582     return page->get_bounding_rectangles(start_anchor, end_anchor, rects);
00583 }
00584 
00585 bool PDFController::set_memory_limit(const unsigned int bytes)
00586 {
00587     // clear all of the render tasks related to this document
00588     PDFLibrary::instance().thread_cancel_render_tasks(this);
00589 
00590     int size = static_cast<int>(bytes >> 1);
00591     LOGPRINTF("Set memory:%d to document:%s", size, file_name.c_str());
00592     return pages_cache.reset(size);
00593 }
00594 
00595 unsigned int PDFController::get_memory_limit()
00596 {
00597     return pages_cache.size();
00598 }
00599 
00600 } //namespace pdf
00601 
00602 
Generated by  doxygen 1.6.2-20100208