#include <nsHebrewProber.h>
Public Member Functions | |
nsHebrewProber (void) | |
virtual | ~nsHebrewProber (void) |
virtual nsProbingState | HandleData (const char *aBuf, PRUint32 aLen) |
virtual const char * | GetCharSetName () |
virtual void | Reset (void) |
virtual nsProbingState | GetState (void) |
virtual float | GetConfidence (void) |
virtual void | SetOpion () |
void | SetModelProbers (nsCharSetProber *logicalPrb, nsCharSetProber *visualPrb) |
Static Protected Member Functions | |
static PRBool | isFinal (char c) |
static PRBool | isNonFinal (char c) |
Protected Attributes | |
PRInt32 | mFinalCharLogicalScore |
PRInt32 | mFinalCharVisualScore |
char | mPrev |
char | mBeforePrev |
nsCharSetProber * | mLogicalProb |
nsCharSetProber * | mVisualProb |
Definition at line 45 of file nsHebrewProber.h.
nsHebrewProber::nsHebrewProber | ( | void | ) | [inline] |
Definition at line 44 of file nsHebrewProber.h.
Referenced by ~nsHebrewProber().
00045 : public nsCharSetProber
virtual nsHebrewProber::~nsHebrewProber | ( | void | ) | [inline, virtual] |
Definition at line 46 of file nsHebrewProber.h.
References GetCharSetName(), HandleData(), mLogicalProb, mVisualProb, nsHebrewProber(), and Reset().
const char * nsHebrewProber::GetCharSetName | ( | ) | [virtual] |
Implements nsCharSetProber.
Definition at line 145 of file nsHebrewProber.cpp.
References nsCharSetProber::GetConfidence(), LOGICAL_HEBREW_NAME, mFinalCharLogicalScore, mFinalCharVisualScore, MIN_FINAL_CHAR_DISTANCE, MIN_MODEL_DISTANCE, mLogicalProb, mVisualProb, and VISUAL_HEBREW_NAME.
Referenced by ~nsHebrewProber().
00146 { 00147 // If the final letter score distance is dominant enough, rely on it. 00148 PRInt32 finalsub = mFinalCharLogicalScore - mFinalCharVisualScore; 00149 if (finalsub >= MIN_FINAL_CHAR_DISTANCE) 00150 return LOGICAL_HEBREW_NAME; 00151 if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE)) 00152 return VISUAL_HEBREW_NAME; 00153 00154 // It's not dominant enough, try to rely on the model scores instead. 00155 float modelsub = mLogicalProb->GetConfidence() - mVisualProb->GetConfidence(); 00156 if (modelsub > MIN_MODEL_DISTANCE) 00157 return LOGICAL_HEBREW_NAME; 00158 if (modelsub < -(MIN_MODEL_DISTANCE)) 00159 return VISUAL_HEBREW_NAME; 00160 00161 // Still no good, back to final letter distance, maybe it'll save the day. 00162 if (finalsub < 0) 00163 return VISUAL_HEBREW_NAME; 00164 00165 // (finalsub > 0 - Logical) or (don't know what to do) default to Logical. 00166 return LOGICAL_HEBREW_NAME; 00167 }
virtual float nsHebrewProber::GetConfidence | ( | void | ) | [inline, virtual] |
Implements nsCharSetProber.
Definition at line 53 of file nsHebrewProber.h.
nsProbingState nsHebrewProber::GetState | ( | void | ) | [virtual] |
Implements nsCharSetProber.
Definition at line 181 of file nsHebrewProber.cpp.
References eDetecting, eNotMe, nsCharSetProber::GetState(), mLogicalProb, and mVisualProb.
Referenced by HandleData().
00182 { 00183 // Remain active as long as any of the model probers are active. 00184 if ((mLogicalProb->GetState() == eNotMe) && (mVisualProb->GetState() == eNotMe)) 00185 return eNotMe; 00186 return eDetecting; 00187 }
nsProbingState nsHebrewProber::HandleData | ( | const char * | aBuf, | |
PRUint32 | aLen | |||
) | [virtual] |
HandleData Final letter analysis for logical-visual decision. Look for evidence that the received buffer is either logical Hebrew or visual Hebrew. The following cases are checked: 1) A word longer than 1 letter, ending with a final letter. This is an indication that the text is laid out "naturally" since the final letter really appears at the end. +1 for logical score. 2) A word longer than 1 letter, ending with a Non-Final letter. In normal Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with the Non-Final form of that letter. Exceptions to this rule are mentioned above in isNonFinal(). This is an indication that the text is laid out backwards. +1 for visual score 3) A word longer than 1 letter, starting with a final letter. Final letters should not appear at the beginning of a word. This is an indication that the text is laid out backwards. +1 for visual score.
The visual score and logical score are accumulated throughout the text and are finally checked against each other in GetCharSetName(). No checking for final letters in the middle of words is done since that case is not an indication for either Logical or Visual text.
The input buffer should not contain any white spaces that are not (' ') or any low-ascii punctuation marks.
Implements nsCharSetProber.
Definition at line 109 of file nsHebrewProber.cpp.
References eDetecting, eNotMe, GetState(), isFinal(), isNonFinal(), mBeforePrev, mFinalCharLogicalScore, mFinalCharVisualScore, and mPrev.
Referenced by ~nsHebrewProber().
00110 { 00111 // Both model probers say it's not them. No reason to continue. 00112 if (GetState() == eNotMe) 00113 return eNotMe; 00114 00115 const char *curPtr, *endPtr = aBuf+aLen; 00116 char cur; 00117 00118 for (curPtr = (char*)aBuf; curPtr < endPtr; ++curPtr) 00119 { 00120 cur = *curPtr; 00121 if (cur == ' ') // We stand on a space - a word just ended 00122 { 00123 if (mBeforePrev != ' ') // *(curPtr-2) was not a space so prev is not a 1 letter word 00124 { 00125 if (isFinal(mPrev)) // case (1) [-2:not space][-1:final letter][cur:space] 00126 ++mFinalCharLogicalScore; 00127 else if (isNonFinal(mPrev)) // case (2) [-2:not space][-1:Non-Final letter][cur:space] 00128 ++mFinalCharVisualScore; 00129 } 00130 } 00131 else // Not standing on a space 00132 { 00133 if ((mBeforePrev == ' ') && (isFinal(mPrev)) && (cur != ' ')) // case (3) [-2:space][-1:final letter][cur:not space] 00134 ++mFinalCharVisualScore; 00135 } 00136 mBeforePrev = mPrev; 00137 mPrev = cur; 00138 } 00139 00140 // Forever detecting, till the end or until both model probers return eNotMe (handled above). 00141 return eDetecting; 00142 }
PRBool nsHebrewProber::isFinal | ( | char | c | ) | [static, protected] |
Definition at line 64 of file nsHebrewProber.cpp.
References FINAL_KAF, FINAL_MEM, FINAL_NUN, FINAL_PE, and FINAL_TSADI.
Referenced by HandleData().
00065 { 00066 return ((c == FINAL_KAF) || (c == FINAL_MEM) || (c == FINAL_NUN) || (c == FINAL_PE) || (c == FINAL_TSADI)); 00067 }
PRBool nsHebrewProber::isNonFinal | ( | char | c | ) | [static, protected] |
Definition at line 69 of file nsHebrewProber.cpp.
References NORMAL_KAF, NORMAL_MEM, NORMAL_NUN, and NORMAL_PE.
Referenced by HandleData().
00070 { 00071 return ((c == NORMAL_KAF) || (c == NORMAL_MEM) || (c == NORMAL_NUN) || (c == NORMAL_PE)); 00072 // The normal Tsadi is not a good Non-Final letter due to words like 00073 // 'lechotet' (to chat) containing an apostrophe after the tsadi. This 00074 // apostrophe is converted to a space in FilterWithoutEnglishLetters causing 00075 // the Non-Final tsadi to appear at an end of a word even though this is not 00076 // the case in the original text. 00077 // The letters Pe and Kaf rarely display a related behavior of not being a 00078 // good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for 00079 // example legally end with a Non-Final Pe or Kaf. However, the benefit of 00080 // these letters as Non-Final letters outweighs the damage since these words 00081 // are quite rare. 00082 }
void nsHebrewProber::Reset | ( | void | ) | [virtual] |
Implements nsCharSetProber.
Definition at line 170 of file nsHebrewProber.cpp.
References mBeforePrev, mFinalCharLogicalScore, mFinalCharVisualScore, and mPrev.
Referenced by ~nsHebrewProber().
00171 { 00172 mFinalCharLogicalScore = 0; 00173 mFinalCharVisualScore = 0; 00174 00175 // mPrev and mBeforePrev are initialized to space in order to simulate a word 00176 // delimiter at the beginning of the data 00177 mPrev = ' '; 00178 mBeforePrev = ' '; 00179 }
void nsHebrewProber::SetModelProbers | ( | nsCharSetProber * | logicalPrb, | |
nsCharSetProber * | visualPrb | |||
) | [inline] |
Definition at line 56 of file nsHebrewProber.h.
Referenced by nsSBCSGroupProber::nsSBCSGroupProber().
virtual void nsHebrewProber::SetOpion | ( | ) | [inline, virtual] |
Implements nsCharSetProber.
Definition at line 54 of file nsHebrewProber.h.
char nsHebrewProber::mBeforePrev [protected] |
Definition at line 70 of file nsHebrewProber.h.
Referenced by HandleData(), and Reset().
PRInt32 nsHebrewProber::mFinalCharLogicalScore [protected] |
Definition at line 67 of file nsHebrewProber.h.
Referenced by GetCharSetName(), HandleData(), and Reset().
PRInt32 nsHebrewProber::mFinalCharVisualScore [protected] |
Definition at line 67 of file nsHebrewProber.h.
Referenced by GetCharSetName(), HandleData(), and Reset().
nsCharSetProber* nsHebrewProber::mLogicalProb [protected] |
Definition at line 73 of file nsHebrewProber.h.
Referenced by GetCharSetName(), GetState(), and ~nsHebrewProber().
char nsHebrewProber::mPrev [protected] |
Definition at line 70 of file nsHebrewProber.h.
Referenced by HandleData(), and Reset().
nsCharSetProber * nsHebrewProber::mVisualProb [protected] |
Definition at line 73 of file nsHebrewProber.h.
Referenced by GetCharSetName(), GetState(), and ~nsHebrewProber().