pdf_anchor.cpp

Go to the documentation of this file.
00001 /*
00002  * File Name: pdf_anchor.cpp
00003  */
00004 
00005 /*
00006  * This file is part of uds-plugin-pdf.
00007  *
00008  * uds-plugin-pdf is free software: you can redistribute it and/or modify
00009  * it under the terms of the GNU General Public License as published by
00010  * the Free Software Foundation, either version 2 of the License, or
00011  * (at your option) any later version.
00012  *
00013  * uds-plugin-pdf is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program. If not, see <http://www.gnu.org/licenses/>.
00020  */
00021 
00022 /**
00023  * Copyright (C) 2008 iRex Technologies B.V.
00024  * All rights reserved.
00025  */
00026 
00027 #include "pdf_anchor.h"
00028 
00029 namespace pdf
00030 {
00031 #define SECTION_SEPERATOR         G_DIR_SEPARATOR
00032 #define EVALUATE_SEPERATOR        ':'
00033 #define TERMINATION_FLAG          '\0'
00034 #define PDF_ANCHOR_HEADER         "pdf:"
00035 #define PDF_PAGE_NUMER_PREFIX     "page"
00036 #define PDF_WORD_PREFIX           "word"
00037 #define PDF_CHAR_PREFIX           "char"
00038 #define PDF_HYPERLINK_PREFIX      "link"
00039 #define PDF_TOC_INDEX_PREFIX      "toc"
00040 #define PDF_FILE_NAME_PREFIX      "file"
00041 
00042 PDFAnchor::PDFAnchor()
00043 {
00044     reset();
00045 }
00046 
00047 PDFAnchor::PDFAnchor(const string &anchor)
00048 {
00049     reset();
00050     parse(anchor);
00051     anchor_str = anchor;
00052 }
00053 
00054 PDFAnchor::~PDFAnchor()
00055 {
00056 }
00057 
00058 void PDFAnchor::reset()
00059 {
00060     page_num = 0;
00061     word_num = -1;
00062     char_idx = -1;
00063     link_idx = -1;
00064     toc_idx  = -1;
00065     file_name.clear();
00066 }
00067 
00068 bool PDFAnchor::operator == (const PDFAnchor &right) const
00069 {
00070     return ((this->page_num == right.page_num)
00071         &&(this->word_num == right.word_num)
00072         &&(this->char_idx == right.char_idx)
00073         &&(this->link_idx == right.link_idx)
00074         &&(this->toc_idx == right.toc_idx)
00075         &&(this->file_name == right.file_name));
00076 }
00077 
00078 bool PDFAnchor::operator < (const PDFAnchor &right) const
00079 {
00080     if (this->page_num != right.page_num)
00081     {
00082         return (this->page_num < right.page_num);
00083     }
00084 
00085     if (this->word_num != right.word_num)
00086     {
00087         return (this->word_num < right.word_num);
00088     }
00089 
00090     if (this->char_idx != right.char_idx)
00091     {
00092         return (this->char_idx < right.char_idx);
00093     }
00094 
00095     if (this->link_idx != right.link_idx)
00096     {
00097         return (this->link_idx < right.link_idx);
00098     }
00099 
00100     if (this->toc_idx != right.toc_idx)
00101     {
00102         return (this->toc_idx < right.toc_idx);
00103     }
00104 
00105     return false;
00106 }
00107 
00108 bool PDFAnchor::operator > (const PDFAnchor &right) const
00109 {
00110     if (this->page_num != right.page_num)
00111     {
00112         return (this->page_num > right.page_num);
00113     }
00114     
00115     if (this->word_num != right.word_num)
00116     {
00117         return (this->word_num > right.word_num);
00118     }
00119 
00120     if (this->char_idx != right.char_idx)
00121     {
00122         return (this->char_idx > right.char_idx);
00123     }
00124 
00125     if (this->link_idx != right.link_idx)
00126     {
00127         return (this->link_idx > right.link_idx);
00128     }
00129 
00130     if (this->toc_idx != right.toc_idx)
00131     {
00132         return (this->toc_idx > right.toc_idx);
00133     }
00134 
00135     return false;
00136 }
00137 
00138 const string& PDFAnchor::get_string()
00139 {
00140     // TODO. check the validation of anchor string to avoid redundant packing.
00141     pack(anchor_str);
00142     return anchor_str;
00143 }
00144 
00145 int PDFAnchor::compare(const PDFAnchor &a1, const PDFAnchor &a2)
00146 {
00147     //if (a1.file_name != a2.file_name)
00148     //{
00149     //    // return if they belog to different file
00150     //    return ANCHOR_COMPARE_ERROR;
00151     //}
00152 
00153     if (a1 == a2)
00154     {
00155         return 0;
00156     }
00157 
00158     if (a1 < a2)
00159     {
00160         return -1;
00161     }
00162 
00163     return 1;
00164 }
00165 
00166 bool PDFAnchor::parse(const string &anchor)
00167 {
00168     size_t pos = anchor.find(PDF_FILE_NAME_PREFIX);
00169     std::string file_part;
00170     std::string front_part;
00171     if (pos != anchor.npos)
00172     {
00173         file_part  = anchor.substr(pos);
00174         front_part = anchor.substr(0, pos - 1);
00175     }
00176     else
00177     {
00178         front_part = anchor;
00179     }
00180     const char *pchar = front_part.c_str();
00181 
00182     bool ret = false;
00183     // parse the sections
00184     stringlist arglist;
00185     string     argument;
00186     while(!ret)
00187     {
00188         switch( *pchar )
00189         {
00190             case SECTION_SEPERATOR:
00191                 {
00192                     arglist.push_back(argument);
00193                     argument.clear();
00194                     pchar++;
00195                 }
00196                 break;
00197             case TERMINATION_FLAG:
00198                 {
00199                     arglist.push_back(argument);
00200                     ret = true;
00201                 }
00202                 break;
00203             default:
00204                 {
00205                     argument.push_back(*pchar++);
00206                 }
00207                 break;
00208         }
00209     }
00210 
00211     for(size_t i = 1; i < arglist.size(); ++i)
00212     {
00213         if (get_prefix(arglist[i]) == PDF_PAGE_NUMER_PREFIX)
00214         {
00215             page_num = get_num(arglist[i]);
00216         }
00217         else if (get_prefix(arglist[i]) == PDF_WORD_PREFIX)
00218         {
00219             word_num = get_num(arglist[i]);
00220         }
00221         else if (get_prefix(arglist[i]) == PDF_CHAR_PREFIX)
00222         {
00223             char_idx = get_num(arglist[i]);
00224         }
00225         else if (get_prefix(arglist[i]) == PDF_HYPERLINK_PREFIX)
00226         {
00227             link_idx = get_num(arglist[i]);
00228         }
00229         else if (get_prefix(arglist[i]) == PDF_TOC_INDEX_PREFIX)
00230         {
00231             toc_idx = get_num(arglist[i]);
00232         }
00233     }
00234 
00235     if (!file_part.empty())
00236     {
00237         file_name = get_postfix(file_part);
00238     }
00239     return true;
00240 }
00241 
00242 bool PDFAnchor::pack(string &anchor)
00243 {
00244     //if (page_num < 0)
00245     //{
00246     //    LOGPRINTF("Cannot pack the anchor with invalid page number!");
00247     //    return false;
00248     //}
00249     anchor = PDF_ANCHOR_HEADER;
00250     anchor += SECTION_SEPERATOR;
00251     anchor += PDF_PAGE_NUMER_PREFIX;
00252     anchor += EVALUATE_SEPERATOR;
00253     char buf[32];
00254     memset(buf, 0, 32);
00255     pdf_printf(buf, "%d", page_num);
00256     anchor += buf;
00257     
00258     if (word_num >= 0)
00259     {
00260         anchor += SECTION_SEPERATOR;
00261         anchor += PDF_WORD_PREFIX;
00262         anchor += EVALUATE_SEPERATOR;
00263         memset(buf, 0, 32);
00264         pdf_printf(buf, "%d", word_num);
00265         anchor += buf;
00266     }
00267     
00268     if (char_idx >= 0)
00269     {
00270         anchor += SECTION_SEPERATOR;
00271         anchor += PDF_CHAR_PREFIX;
00272         anchor += EVALUATE_SEPERATOR;
00273         memset(buf, 0, 32);
00274         pdf_printf(buf, "%d", char_idx);
00275         anchor += buf;
00276     }
00277 
00278     if (link_idx >= 0)
00279     {
00280         anchor += SECTION_SEPERATOR;
00281         anchor += PDF_HYPERLINK_PREFIX;
00282         anchor += EVALUATE_SEPERATOR;
00283         memset(buf, 0, 32);
00284         pdf_printf(buf, "%d", link_idx);
00285         anchor += buf;
00286     }
00287 
00288     if (toc_idx >= 0)
00289     {
00290         anchor += SECTION_SEPERATOR;
00291         anchor += PDF_TOC_INDEX_PREFIX;
00292         anchor += EVALUATE_SEPERATOR;
00293         memset(buf, 0, 32);
00294         pdf_printf(buf, "%d", toc_idx);
00295         anchor += buf;
00296     }
00297 
00298     // add the file name into anchor
00299     // this is not a mandatory option
00300     if (!file_name.empty())
00301     {
00302         anchor += SECTION_SEPERATOR;
00303         anchor += PDF_FILE_NAME_PREFIX;
00304         anchor += EVALUATE_SEPERATOR;
00305         anchor += file_name;
00306     }
00307 
00308     return true;
00309 }
00310 
00311 void PDFAnchor::set_end_anchor()
00312 {
00313     string name = file_name;
00314     reset();
00315     file_name = name;
00316     page_num  = 0;
00317 }
00318 
00319 bool PDFAnchor::is_end_anchor() const
00320 {
00321     return (page_num <= 0);
00322 }
00323 
00324 int PDFAnchor::get_num(const string &page_sub_str)
00325 {
00326     size_t idx = page_sub_str.find(EVALUATE_SEPERATOR);
00327 
00328     if (idx != page_sub_str.npos)
00329     {
00330         string num_str = page_sub_str.substr(idx + 1);
00331         return atoi(num_str.c_str());
00332     }
00333         
00334     return 0;
00335 }
00336 
00337 const string PDFAnchor::get_prefix(const string &str)
00338 {
00339     return str.substr(0, str.find(EVALUATE_SEPERATOR));
00340 }
00341 
00342 const string PDFAnchor::get_postfix(const string &str)
00343 {
00344     return str.substr(str.find(EVALUATE_SEPERATOR) + 1);
00345 }
00346 
00347 void PDFAnchor::validate_for_getting_text(void)
00348 {
00349     if (page_num)
00350     {
00351         if (word_num == -1)
00352         {
00353             word_num = 0;
00354         }
00355 
00356         if (char_idx == -1)
00357         {
00358             char_idx = 0;
00359         }
00360     }
00361 }
00362 }// namespace pdf
00363 
Generated by  doxygen 1.6.2-20100208