00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* ***** BEGIN LICENSE BLOCK ***** 00003 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 00004 * 00005 * The contents of this file are subject to the Mozilla Public License Version 00006 * 1.1 (the "License"); you may not use this file except in compliance with 00007 * the License. You may obtain a copy of the License at 00008 * http://www.mozilla.org/MPL/ 00009 * 00010 * Software distributed under the License is distributed on an "AS IS" basis, 00011 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 00012 * for the specific language governing rights and limitations under the 00013 * License. 00014 * 00015 * The Original Code is Mozilla Universal charset detector code. 00016 * 00017 * The Initial Developer of the Original Code is 00018 * Netscape Communications Corporation. 00019 * Portions created by the Initial Developer are Copyright (C) 2001 00020 * the Initial Developer. All Rights Reserved. 00021 * 00022 * Contributor(s): 00023 * Shy Shalom <shooshX@gmail.com> 00024 * 00025 * Alternatively, the contents of this file may be used under the terms of 00026 * either the GNU General Public License Version 2 or later (the "GPL"), or 00027 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 00028 * in which case the provisions of the GPL or the LGPL are applicable instead 00029 * of those above. If you wish to allow use of your version of this file only 00030 * under the terms of either the GPL or the LGPL, and not to allow others to 00031 * use your version of this file under the terms of the MPL, indicate your 00032 * decision by deleting the provisions above and replace them with the notice 00033 * and other provisions required by the GPL or the LGPL. If you do not delete 00034 * the provisions above, a recipient may use your version of this file under 00035 * the terms of any one of the MPL, the GPL or the LGPL. 00036 * 00037 * ***** END LICENSE BLOCK ***** */ 00038 #ifndef nsCharSetProber_h__ 00039 #define nsCharSetProber_h__ 00040 00041 #include "nscore.h" 00042 00043 //#define DEBUG_chardet // Uncomment this for debug dump. 00044 00045 typedef enum { 00046 eDetecting = 0, //We are still detecting, no sure answer yet, but caller can ask for confidence. 00047 eFoundIt = 1, //That's a positive answer 00048 eNotMe = 2 //Negative answer 00049 } nsProbingState; 00050 00051 #define SHORTCUT_THRESHOLD (float)0.95 00052 00053 class nsCharSetProber { 00054 public: 00055 virtual ~nsCharSetProber() {}; 00056 virtual const char* GetCharSetName() = 0; 00057 virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen) = 0; 00058 virtual nsProbingState GetState(void) = 0; 00059 virtual void Reset(void) = 0; 00060 virtual float GetConfidence(void) = 0; 00061 virtual void SetOpion() = 0; 00062 00063 #ifdef DEBUG_chardet 00064 virtual void DumpStatus() {}; 00065 #endif 00066 00067 // Helper functions used in the Latin1 and Group probers. 00068 // both functions Allocate a new buffer for newBuf. This buffer should be 00069 // freed by the caller using PR_FREEIF. 00070 // Both functions return PR_FALSE in case of memory allocation failure. 00071 static PRBool FilterWithoutEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen); 00072 static PRBool FilterWithEnglishLetters(const char* aBuf, PRUint32 aLen, char** newBuf, PRUint32& newLen); 00073 00074 }; 00075 00076 #endif /* nsCharSetProber_h__ */