pdf_anchor.cpp
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "pdf_anchor.h"
00028
00029 namespace pdf
00030 {
00031 #define SECTION_SEPERATOR G_DIR_SEPARATOR
00032 #define EVALUATE_SEPERATOR ':'
00033 #define TERMINATION_FLAG '\0'
00034 #define PDF_ANCHOR_HEADER "pdf:"
00035 #define PDF_PAGE_NUMER_PREFIX "page"
00036 #define PDF_WORD_PREFIX "word"
00037 #define PDF_CHAR_PREFIX "char"
00038 #define PDF_HYPERLINK_PREFIX "link"
00039 #define PDF_TOC_INDEX_PREFIX "toc"
00040 #define PDF_FILE_NAME_PREFIX "file"
00041
00042 PDFAnchor::PDFAnchor()
00043 {
00044 reset();
00045 }
00046
00047 PDFAnchor::PDFAnchor(const string &anchor)
00048 {
00049 reset();
00050 parse(anchor);
00051 anchor_str = anchor;
00052 }
00053
00054 PDFAnchor::~PDFAnchor()
00055 {
00056 }
00057
00058 void PDFAnchor::reset()
00059 {
00060 page_num = 0;
00061 word_num = -1;
00062 char_idx = -1;
00063 link_idx = -1;
00064 toc_idx = -1;
00065 file_name.clear();
00066 }
00067
00068 bool PDFAnchor::operator == (const PDFAnchor &right) const
00069 {
00070 return ((this->page_num == right.page_num)
00071 &&(this->word_num == right.word_num)
00072 &&(this->char_idx == right.char_idx)
00073 &&(this->link_idx == right.link_idx)
00074 &&(this->toc_idx == right.toc_idx)
00075 &&(this->file_name == right.file_name));
00076 }
00077
00078 bool PDFAnchor::operator < (const PDFAnchor &right) const
00079 {
00080 if (this->page_num != right.page_num)
00081 {
00082 return (this->page_num < right.page_num);
00083 }
00084
00085 if (this->word_num != right.word_num)
00086 {
00087 return (this->word_num < right.word_num);
00088 }
00089
00090 if (this->char_idx != right.char_idx)
00091 {
00092 return (this->char_idx < right.char_idx);
00093 }
00094
00095 if (this->link_idx != right.link_idx)
00096 {
00097 return (this->link_idx < right.link_idx);
00098 }
00099
00100 if (this->toc_idx != right.toc_idx)
00101 {
00102 return (this->toc_idx < right.toc_idx);
00103 }
00104
00105 return false;
00106 }
00107
00108 bool PDFAnchor::operator > (const PDFAnchor &right) const
00109 {
00110 if (this->page_num != right.page_num)
00111 {
00112 return (this->page_num > right.page_num);
00113 }
00114
00115 if (this->word_num != right.word_num)
00116 {
00117 return (this->word_num > right.word_num);
00118 }
00119
00120 if (this->char_idx != right.char_idx)
00121 {
00122 return (this->char_idx > right.char_idx);
00123 }
00124
00125 if (this->link_idx != right.link_idx)
00126 {
00127 return (this->link_idx > right.link_idx);
00128 }
00129
00130 if (this->toc_idx != right.toc_idx)
00131 {
00132 return (this->toc_idx > right.toc_idx);
00133 }
00134
00135 return false;
00136 }
00137
00138 const string& PDFAnchor::get_string()
00139 {
00140
00141 pack(anchor_str);
00142 return anchor_str;
00143 }
00144
00145 int PDFAnchor::compare(const PDFAnchor &a1, const PDFAnchor &a2)
00146 {
00147
00148
00149
00150
00151
00152
00153 if (a1 == a2)
00154 {
00155 return 0;
00156 }
00157
00158 if (a1 < a2)
00159 {
00160 return -1;
00161 }
00162
00163 return 1;
00164 }
00165
00166 bool PDFAnchor::parse(const string &anchor)
00167 {
00168 size_t pos = anchor.find(PDF_FILE_NAME_PREFIX);
00169 std::string file_part;
00170 std::string front_part;
00171 if (pos != anchor.npos)
00172 {
00173 file_part = anchor.substr(pos);
00174 front_part = anchor.substr(0, pos - 1);
00175 }
00176 else
00177 {
00178 front_part = anchor;
00179 }
00180 const char *pchar = front_part.c_str();
00181
00182 bool ret = false;
00183
00184 stringlist arglist;
00185 string argument;
00186 while(!ret)
00187 {
00188 switch( *pchar )
00189 {
00190 case SECTION_SEPERATOR:
00191 {
00192 arglist.push_back(argument);
00193 argument.clear();
00194 pchar++;
00195 }
00196 break;
00197 case TERMINATION_FLAG:
00198 {
00199 arglist.push_back(argument);
00200 ret = true;
00201 }
00202 break;
00203 default:
00204 {
00205 argument.push_back(*pchar++);
00206 }
00207 break;
00208 }
00209 }
00210
00211 for(size_t i = 1; i < arglist.size(); ++i)
00212 {
00213 if (get_prefix(arglist[i]) == PDF_PAGE_NUMER_PREFIX)
00214 {
00215 page_num = get_num(arglist[i]);
00216 }
00217 else if (get_prefix(arglist[i]) == PDF_WORD_PREFIX)
00218 {
00219 word_num = get_num(arglist[i]);
00220 }
00221 else if (get_prefix(arglist[i]) == PDF_CHAR_PREFIX)
00222 {
00223 char_idx = get_num(arglist[i]);
00224 }
00225 else if (get_prefix(arglist[i]) == PDF_HYPERLINK_PREFIX)
00226 {
00227 link_idx = get_num(arglist[i]);
00228 }
00229 else if (get_prefix(arglist[i]) == PDF_TOC_INDEX_PREFIX)
00230 {
00231 toc_idx = get_num(arglist[i]);
00232 }
00233 }
00234
00235 if (!file_part.empty())
00236 {
00237 file_name = get_postfix(file_part);
00238 }
00239 return true;
00240 }
00241
00242 bool PDFAnchor::pack(string &anchor)
00243 {
00244
00245
00246
00247
00248
00249 anchor = PDF_ANCHOR_HEADER;
00250 anchor += SECTION_SEPERATOR;
00251 anchor += PDF_PAGE_NUMER_PREFIX;
00252 anchor += EVALUATE_SEPERATOR;
00253 char buf[32];
00254 memset(buf, 0, 32);
00255 pdf_printf(buf, "%d", page_num);
00256 anchor += buf;
00257
00258 if (word_num >= 0)
00259 {
00260 anchor += SECTION_SEPERATOR;
00261 anchor += PDF_WORD_PREFIX;
00262 anchor += EVALUATE_SEPERATOR;
00263 memset(buf, 0, 32);
00264 pdf_printf(buf, "%d", word_num);
00265 anchor += buf;
00266 }
00267
00268 if (char_idx >= 0)
00269 {
00270 anchor += SECTION_SEPERATOR;
00271 anchor += PDF_CHAR_PREFIX;
00272 anchor += EVALUATE_SEPERATOR;
00273 memset(buf, 0, 32);
00274 pdf_printf(buf, "%d", char_idx);
00275 anchor += buf;
00276 }
00277
00278 if (link_idx >= 0)
00279 {
00280 anchor += SECTION_SEPERATOR;
00281 anchor += PDF_HYPERLINK_PREFIX;
00282 anchor += EVALUATE_SEPERATOR;
00283 memset(buf, 0, 32);
00284 pdf_printf(buf, "%d", link_idx);
00285 anchor += buf;
00286 }
00287
00288 if (toc_idx >= 0)
00289 {
00290 anchor += SECTION_SEPERATOR;
00291 anchor += PDF_TOC_INDEX_PREFIX;
00292 anchor += EVALUATE_SEPERATOR;
00293 memset(buf, 0, 32);
00294 pdf_printf(buf, "%d", toc_idx);
00295 anchor += buf;
00296 }
00297
00298
00299
00300 if (!file_name.empty())
00301 {
00302 anchor += SECTION_SEPERATOR;
00303 anchor += PDF_FILE_NAME_PREFIX;
00304 anchor += EVALUATE_SEPERATOR;
00305 anchor += file_name;
00306 }
00307
00308 return true;
00309 }
00310
00311 void PDFAnchor::set_end_anchor()
00312 {
00313 string name = file_name;
00314 reset();
00315 file_name = name;
00316 page_num = 0;
00317 }
00318
00319 bool PDFAnchor::is_end_anchor() const
00320 {
00321 return (page_num <= 0);
00322 }
00323
00324 int PDFAnchor::get_num(const string &page_sub_str)
00325 {
00326 size_t idx = page_sub_str.find(EVALUATE_SEPERATOR);
00327
00328 if (idx != page_sub_str.npos)
00329 {
00330 string num_str = page_sub_str.substr(idx + 1);
00331 return atoi(num_str.c_str());
00332 }
00333
00334 return 0;
00335 }
00336
00337 const string PDFAnchor::get_prefix(const string &str)
00338 {
00339 return str.substr(0, str.find(EVALUATE_SEPERATOR));
00340 }
00341
00342 const string PDFAnchor::get_postfix(const string &str)
00343 {
00344 return str.substr(str.find(EVALUATE_SEPERATOR) + 1);
00345 }
00346
00347 void PDFAnchor::validate_for_getting_text(void)
00348 {
00349 if (page_num)
00350 {
00351 if (word_num == -1)
00352 {
00353 word_num = 0;
00354 }
00355
00356 if (char_idx == -1)
00357 {
00358 char_idx = 0;
00359 }
00360 }
00361 }
00362 }
00363