00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* ***** BEGIN LICENSE BLOCK ***** 00003 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 00004 * 00005 * The contents of this file are subject to the Mozilla Public License Version 00006 * 1.1 (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * http://www.mozilla.org/MPL/ 00009 * 00010 * Software distributed under the License is distributed on an "AS IS" basis, 00011 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 00012 * for the specific language governing rights and limitations under the 00013 * License. 00014 * 00015 * The Original Code is Mozilla Communicator client code. 00016 * 00017 * The Initial Developer of the Original Code is 00018 * Netscape Communications Corporation. 00019 * Portions created by the Initial Developer are Copyright (C) 1998 00020 * the Initial Developer. All Rights Reserved. 00021 * 00022 * Contributor(s): 00023 * 00024 * Alternatively, the contents of this file may be used under the terms of 00025 * either the GNU General Public License Version 2 or later (the "GPL"), or 00026 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 00027 * in which case the provisions of the GPL or the LGPL are applicable instead 00028 * of those above. If you wish to allow use of your version of this file only 00029 * under the terms of either the GPL or the LGPL, and not to allow others to 00030 * use your version of this file under the terms of the MPL, indicate your 00031 * decision by deleting the provisions above and replace them with the notice 00032 * and other provisions required by the GPL or the LGPL. If you do not delete 00033 * the provisions above, a recipient may use your version of this file under 00034 * the terms of any one of the MPL, the GPL or the LGPL. 00035 * 00036 * ***** END LICENSE BLOCK ***** */ 00037 00038 #ifndef __JPCNTX_H__ 00039 #define __JPCNTX_H__ 00040 00041 #define NUM_OF_CATEGORY 6 00042 00043 #include "nscore.h" 00044 00045 #define ENOUGH_REL_THRESHOLD 100 00046 #define MAX_REL_THRESHOLD 1000 00047 00048 //hiragana frequency category table 00049 extern unsigned char jp2CharContext[83][83]; 00050 00051 class JapaneseContextAnalysis 00052 { 00053 public: 00054 JapaneseContextAnalysis() {Reset();}; 00055 virtual ~JapaneseContextAnalysis() {} 00056 00057 void HandleData(const char* aBuf, PRUint32 aLen); 00058 00059 void HandleOneChar(const char* aStr, PRUint32 aCharLen) 00060 { 00061 PRInt32 order; 00062 00063 //if we received enough data, stop here 00064 if (mTotalRel > MAX_REL_THRESHOLD) mDone = PR_TRUE; 00065 if (mDone) return; 00066 00067 //Only 2-bytes characters are of our interest 00068 order = (aCharLen == 2) ? GetOrder(aStr) : -1; 00069 if (order != -1 && mLastCharOrder != -1) 00070 { 00071 mTotalRel++; 00072 //count this sequence to its category counter 00073 mRelSample[jp2CharContext[mLastCharOrder][order]]++; 00074 } 00075 mLastCharOrder = order; 00076 }; 00077 00078 float GetConfidence(); 00079 void Reset(void); 00080 void SetOpion(){}; 00081 PRBool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}; 00082 00083 protected: 00084 virtual PRInt32 GetOrder(const char* str, PRUint32 *charLen) = 0; 00085 virtual PRInt32 GetOrder(const char* str) = 0; 00086 00087 //category counters, each interger counts sequence in its category 00088 PRUint32 mRelSample[NUM_OF_CATEGORY]; 00089 00090 //total sequence received 00091 PRUint32 mTotalRel; 00092 00093 //The order of previous char 00094 PRInt32 mLastCharOrder; 00095 00096 //if last byte in current buffer is not the last byte of a character, we 00097 //need to know how many byte to skip in next buffer. 00098 PRUint32 mNeedToSkipCharNum; 00099 00100 //If this flag is set to PR_TRUE, detection is done and conclusion has been made 00101 PRBool mDone; 00102 }; 00103 00104 00105 class SJISContextAnalysis : public JapaneseContextAnalysis 00106 { 00107 //SJISContextAnalysis(){}; 00108 protected: 00109 PRInt32 GetOrder(const char* str, PRUint32 *charLen); 00110 00111 PRInt32 GetOrder(const char* str) 00112 { 00113 //We only interested in Hiragana, so first byte is '\202' 00114 if (*str == '\202' && 00115 (unsigned char)*(str+1) >= (unsigned char)0x9f && 00116 (unsigned char)*(str+1) <= (unsigned char)0xf1) 00117 return (unsigned char)*(str+1) - (unsigned char)0x9f; 00118 return -1; 00119 }; 00120 }; 00121 00122 class EUCJPContextAnalysis : public JapaneseContextAnalysis 00123 { 00124 protected: 00125 PRInt32 GetOrder(const char* str, PRUint32 *charLen); 00126 PRInt32 GetOrder(const char* str) 00127 //We only interested in Hiragana, so first byte is '\244' 00128 { 00129 if (*str == '\244' && 00130 (unsigned char)*(str+1) >= (unsigned char)0xa1 && 00131 (unsigned char)*(str+1) <= (unsigned char)0xf3) 00132 return (unsigned char)*(str+1) - (unsigned char)0xa1; 00133 return -1; 00134 }; 00135 }; 00136 00137 #endif /* __JPCNTX_H__ */ 00138