nsCharSetProber Class Reference

#include <nsCharSetProber.h>

Inheritance diagram for nsCharSetProber:
Inheritance graph
[legend]

Public Member Functions

virtual ~nsCharSetProber ()
virtual const char * GetCharSetName ()=0
virtual nsProbingState HandleData (const char *aBuf, PRUint32 aLen)=0
virtual nsProbingState GetState (void)=0
virtual void Reset (void)=0
virtual float GetConfidence (void)=0
virtual void SetOpion ()=0

Static Public Member Functions

static PRBool FilterWithoutEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)
static PRBool FilterWithEnglishLetters (const char *aBuf, PRUint32 aLen, char **newBuf, PRUint32 &newLen)

Detailed Description

Definition at line 53 of file nsCharSetProber.h.


Constructor & Destructor Documentation

virtual nsCharSetProber::~nsCharSetProber (  )  [inline, virtual]

Definition at line 55 of file nsCharSetProber.h.

00055 {};


Member Function Documentation

PRBool nsCharSetProber::FilterWithEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static]

Definition at line 83 of file nsCharSetProber.cpp.

References PR_FALSE, PR_Malloc, and PR_TRUE.

Referenced by nsLatin1Prober::HandleData().

00084 {
00085   //do filtering to reduce load to probers
00086   char *newptr;
00087   char *prevPtr, *curPtr;
00088   PRBool isInTag = PR_FALSE;
00089 
00090   newptr = *newBuf = (char*)PR_Malloc(aLen);
00091   if (!newptr)
00092     return PR_FALSE;
00093 
00094   for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
00095   {
00096     if (*curPtr == '>')
00097       isInTag = PR_FALSE;
00098     else if (*curPtr == '<')
00099       isInTag = PR_TRUE;
00100 
00101     if (!(*curPtr & 0x80) &&
00102         (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') )
00103     {
00104       if (curPtr > prevPtr && !isInTag) // Current segment contains more than just a symbol 
00105                                         // and it is not inside a tag, keep it.
00106       {
00107         while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
00108         prevPtr++;
00109         *newptr++ = ' ';
00110       }
00111       else
00112         prevPtr = curPtr+1;
00113     }
00114   }
00115 
00116   // If the current segment contains more than just a symbol 
00117   // and it is not inside a tag then keep it.
00118   if (!isInTag)
00119     while (prevPtr < curPtr)
00120       *newptr++ = *prevPtr++;  
00121 
00122   newLen = newptr - *newBuf;
00123 
00124   return PR_TRUE;
00125 }

Here is the caller graph for this function:

PRBool nsCharSetProber::FilterWithoutEnglishLetters ( const char *  aBuf,
PRUint32  aLen,
char **  newBuf,
PRUint32 newLen 
) [static]

Definition at line 43 of file nsCharSetProber.cpp.

References PR_FALSE, PR_Malloc, and PR_TRUE.

Referenced by nsSBCSGroupProber::HandleData().

00044 {
00045   char *newptr;
00046   char *prevPtr, *curPtr;
00047   
00048   PRBool meetMSB = PR_FALSE;   
00049   newptr = *newBuf = (char*)PR_Malloc(aLen);
00050   if (!newptr)
00051     return PR_FALSE;
00052 
00053   for (curPtr = prevPtr = (char*)aBuf; curPtr < aBuf+aLen; curPtr++)
00054   {
00055     if (*curPtr & 0x80)
00056     {
00057       meetMSB = PR_TRUE;
00058     }
00059     else if (*curPtr < 'A' || (*curPtr > 'Z' && *curPtr < 'a') || *curPtr > 'z') 
00060     {
00061       //current char is a symbol, most likely a punctuation. we treat it as segment delimiter
00062       if (meetMSB && curPtr > prevPtr) 
00063       //this segment contains more than single symbol, and it has upper ASCII, we need to keep it
00064       {
00065         while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
00066         prevPtr++;
00067         *newptr++ = ' ';
00068         meetMSB = PR_FALSE;
00069       }
00070       else //ignore current segment. (either because it is just a symbol or just an English word)
00071         prevPtr = curPtr+1;
00072     }
00073   }
00074   if (meetMSB && curPtr > prevPtr) 
00075     while (prevPtr < curPtr) *newptr++ = *prevPtr++;  
00076 
00077   newLen = newptr - *newBuf;
00078 
00079   return PR_TRUE;
00080 }

Here is the caller graph for this function:

virtual const char* nsCharSetProber::GetCharSetName (  )  [pure virtual]
virtual float nsCharSetProber::GetConfidence ( void   )  [pure virtual]
virtual nsProbingState nsCharSetProber::GetState ( void   )  [pure virtual]
virtual nsProbingState nsCharSetProber::HandleData ( const char *  aBuf,
PRUint32  aLen 
) [pure virtual]
virtual void nsCharSetProber::Reset ( void   )  [pure virtual]
virtual void nsCharSetProber::SetOpion (  )  [pure virtual]

The documentation for this class was generated from the following files:
Generated by  doxygen 1.6.2-20100208