#include <CharDistribution.h>
Public Member Functions | |
CharDistributionAnalysis () | |
virtual | ~CharDistributionAnalysis () |
void | HandleData (const char *aBuf, PRUint32 aLen) |
void | HandleOneChar (const char *aStr, PRUint32 aCharLen) |
float | GetConfidence () |
void | Reset (void) |
void | SetOpion () |
PRBool | GotEnoughData () |
Protected Member Functions | |
virtual PRInt32 | GetOrder (const char *str) |
Protected Attributes | |
PRBool | mDone |
PRUint32 | mFreqChars |
PRUint32 | mTotalChars |
const PRInt16 * | mCharToFreqOrder |
PRUint32 | mTableSize |
float | mTypicalDistributionRatio |
Definition at line 45 of file CharDistribution.h.
CharDistributionAnalysis::CharDistributionAnalysis | ( | ) | [inline] |
Definition at line 48 of file CharDistribution.h.
References Reset().
00048 {Reset();}
virtual CharDistributionAnalysis::~CharDistributionAnalysis | ( | ) | [inline, virtual] |
Definition at line 49 of file CharDistribution.h.
float CharDistributionAnalysis::GetConfidence | ( | ) |
Definition at line 52 of file CharDistribution.cpp.
References mFreqChars, MINIMUM_DATA_THRESHOLD, mTotalChars, mTypicalDistributionRatio, SURE_NO, and SURE_YES.
Referenced by nsSJISProber::GetConfidence(), nsGB18030Prober::GetConfidence(), nsEUCTWProber::GetConfidence(), nsEUCKRProber::GetConfidence(), nsEUCJPProber::GetConfidence(), and nsBig5Prober::GetConfidence().
00053 { 00054 //if we didn't receive any character in our consideration range, or the 00055 // number of frequent characters is below the minimum threshold, return 00056 // negative answer 00057 if (mTotalChars <= 0 || mFreqChars <= MINIMUM_DATA_THRESHOLD) 00058 return SURE_NO; 00059 00060 if (mTotalChars != mFreqChars) { 00061 float r = mFreqChars / ((mTotalChars - mFreqChars) * mTypicalDistributionRatio); 00062 00063 if (r < SURE_YES) 00064 return r; 00065 } 00066 //normalize confidence, (we don't want to be 100% sure) 00067 return SURE_YES; 00068 }
virtual PRInt32 CharDistributionAnalysis::GetOrder | ( | const char * | str | ) | [inline, protected, virtual] |
Reimplemented in EUCTWDistributionAnalysis, EUCKRDistributionAnalysis, GB2312DistributionAnalysis, Big5DistributionAnalysis, SJISDistributionAnalysis, and EUCJPDistributionAnalysis.
Definition at line 97 of file CharDistribution.h.
Referenced by HandleOneChar().
PRBool CharDistributionAnalysis::GotEnoughData | ( | ) | [inline] |
Definition at line 91 of file CharDistribution.h.
References ENOUGH_DATA_THRESHOLD, and mTotalChars.
Referenced by nsGB18030Prober::HandleData(), nsEUCTWProber::HandleData(), nsEUCKRProber::HandleData(), and nsBig5Prober::HandleData().
00091 {return mTotalChars > ENOUGH_DATA_THRESHOLD;};
void CharDistributionAnalysis::HandleData | ( | const char * | aBuf, | |
PRUint32 | aLen | |||
) | [inline] |
Definition at line 52 of file CharDistribution.h.
void CharDistributionAnalysis::HandleOneChar | ( | const char * | aStr, | |
PRUint32 | aCharLen | |||
) | [inline] |
Definition at line 55 of file CharDistribution.h.
References GetOrder(), mCharToFreqOrder, mFreqChars, mTableSize, and mTotalChars.
Referenced by nsSJISProber::HandleData(), nsGB18030Prober::HandleData(), nsEUCTWProber::HandleData(), nsEUCKRProber::HandleData(), nsEUCJPProber::HandleData(), and nsBig5Prober::HandleData().
00056 { 00057 PRInt32 order; 00058 00059 //we only care about 2-bytes character in our distribution analysis 00060 order = (aCharLen == 2) ? GetOrder(aStr) : -1; 00061 00062 if (order >= 0) 00063 { 00064 mTotalChars++; 00065 //order is valid 00066 if ((PRUint32)order < mTableSize) 00067 { 00068 if (512 > mCharToFreqOrder[order]) 00069 mFreqChars++; 00070 } 00071 } 00072 };
void CharDistributionAnalysis::Reset | ( | void | ) | [inline] |
Definition at line 78 of file CharDistribution.h.
References mDone, mFreqChars, mTotalChars, and PR_FALSE.
Referenced by CharDistributionAnalysis(), nsSJISProber::Reset(), nsGB18030Prober::Reset(), nsEUCTWProber::Reset(), nsEUCKRProber::Reset(), nsEUCJPProber::Reset(), and nsBig5Prober::Reset().
00079 { 00080 mDone = PR_FALSE; 00081 mTotalChars = 0; 00082 mFreqChars = 0; 00083 };
void CharDistributionAnalysis::SetOpion | ( | ) | [inline] |
Definition at line 87 of file CharDistribution.h.
const PRInt16* CharDistributionAnalysis::mCharToFreqOrder [protected] |
Definition at line 109 of file CharDistribution.h.
Referenced by Big5DistributionAnalysis::Big5DistributionAnalysis(), EUCJPDistributionAnalysis::EUCJPDistributionAnalysis(), EUCKRDistributionAnalysis::EUCKRDistributionAnalysis(), EUCTWDistributionAnalysis::EUCTWDistributionAnalysis(), GB2312DistributionAnalysis::GB2312DistributionAnalysis(), HandleOneChar(), and SJISDistributionAnalysis::SJISDistributionAnalysis().
PRBool CharDistributionAnalysis::mDone [protected] |
Definition at line 97 of file CharDistribution.h.
Referenced by Reset().
PRUint32 CharDistributionAnalysis::mFreqChars [protected] |
Definition at line 103 of file CharDistribution.h.
Referenced by GetConfidence(), HandleOneChar(), and Reset().
PRUint32 CharDistributionAnalysis::mTableSize [protected] |
Definition at line 112 of file CharDistribution.h.
Referenced by Big5DistributionAnalysis::Big5DistributionAnalysis(), EUCJPDistributionAnalysis::EUCJPDistributionAnalysis(), EUCKRDistributionAnalysis::EUCKRDistributionAnalysis(), EUCTWDistributionAnalysis::EUCTWDistributionAnalysis(), GB2312DistributionAnalysis::GB2312DistributionAnalysis(), HandleOneChar(), and SJISDistributionAnalysis::SJISDistributionAnalysis().
PRUint32 CharDistributionAnalysis::mTotalChars [protected] |
Definition at line 106 of file CharDistribution.h.
Referenced by GetConfidence(), GotEnoughData(), HandleOneChar(), and Reset().
float CharDistributionAnalysis::mTypicalDistributionRatio [protected] |
Definition at line 116 of file CharDistribution.h.
Referenced by Big5DistributionAnalysis::Big5DistributionAnalysis(), EUCJPDistributionAnalysis::EUCJPDistributionAnalysis(), EUCKRDistributionAnalysis::EUCKRDistributionAnalysis(), EUCTWDistributionAnalysis::EUCTWDistributionAnalysis(), GB2312DistributionAnalysis::GB2312DistributionAnalysis(), GetConfidence(), and SJISDistributionAnalysis::SJISDistributionAnalysis().