149 lines
5.5 KiB
C++

/////////////////////////////////////////////////////////////////////////////
// Name: wx/tokenzr.h
// Purpose: String tokenizer - a C++ replacement for strtok(3)
// Author: Guilhem Lavaux
// Modified by: (or rather rewritten by) Vadim Zeitlin
// Created: 04/22/98
// Copyright: (c) Guilhem Lavaux
// Licence: wxWindows licence
/////////////////////////////////////////////////////////////////////////////
#ifndef _WX_TOKENZRH
#define _WX_TOKENZRH
#include "wx/object.h"
#include "wx/string.h"
#include "wx/arrstr.h"
// ----------------------------------------------------------------------------
// constants
// ----------------------------------------------------------------------------
// default: delimiters are usual white space characters
#define wxDEFAULT_DELIMITERS (wxT(" \t\r\n"))
// wxStringTokenizer mode flags which determine its behaviour
enum wxStringTokenizerMode
{
wxTOKEN_INVALID = -1, // set by def ctor until SetString() is called
wxTOKEN_DEFAULT, // strtok() for whitespace delims, RET_EMPTY else
wxTOKEN_RET_EMPTY, // return empty token in the middle of the string
wxTOKEN_RET_EMPTY_ALL, // return trailing empty tokens too
wxTOKEN_RET_DELIMS, // return the delim with token (implies RET_EMPTY)
wxTOKEN_STRTOK // behave exactly like strtok(3)
};
// ----------------------------------------------------------------------------
// wxStringTokenizer: replaces infamous strtok() and has some other features
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxStringTokenizer : public wxObject
{
public:
// ctors and initializers
// default ctor, call SetString() later
wxStringTokenizer() { m_mode = wxTOKEN_INVALID; }
// ctor which gives us the string
wxStringTokenizer(const wxString& str,
const wxString& delims = wxDEFAULT_DELIMITERS,
wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
// args are same as for the non default ctor above
void SetString(const wxString& str,
const wxString& delims = wxDEFAULT_DELIMITERS,
wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
// reinitialize the tokenizer with the same delimiters/mode
void Reinit(const wxString& str);
// tokens access
// return the number of remaining tokens
size_t CountTokens() const;
// did we reach the end of the string?
bool HasMoreTokens() const;
// get the next token, will return empty string if !HasMoreTokens()
wxString GetNextToken();
// get the delimiter which terminated the token last retrieved by
// GetNextToken() or NUL if there had been no tokens yet or the last
// one wasn't terminated (but ran to the end of the string)
wxChar GetLastDelimiter() const { return m_lastDelim; }
// get current tokenizer state
// returns the part of the string which remains to tokenize (*not* the
// initial string)
wxString GetString() const { return wxString(m_pos, m_string.end()); }
// returns the current position (i.e. one index after the last
// returned token or 0 if GetNextToken() has never been called) in the
// original string
size_t GetPosition() const { return m_pos - m_string.begin(); }
// misc
// get the current mode - can be different from the one passed to the
// ctor if it was wxTOKEN_DEFAULT
wxStringTokenizerMode GetMode() const { return m_mode; }
// do we return empty tokens?
bool AllowEmpty() const { return m_mode != wxTOKEN_STRTOK; }
// backwards compatibility section from now on
// -------------------------------------------
// for compatibility only, use GetNextToken() instead
wxString NextToken() { return GetNextToken(); }
// compatibility only, don't use
void SetString(const wxString& to_tokenize,
const wxString& delims,
bool WXUNUSED(ret_delim))
{
SetString(to_tokenize, delims, wxTOKEN_RET_DELIMS);
}
wxStringTokenizer(const wxString& to_tokenize,
const wxString& delims,
bool ret_delim)
{
SetString(to_tokenize, delims, ret_delim);
}
protected:
bool IsOk() const { return m_mode != wxTOKEN_INVALID; }
bool DoHasMoreTokens() const;
enum MoreTokensState
{
MoreTokens_Unknown,
MoreTokens_Yes,
MoreTokens_No
};
MoreTokensState m_hasMoreTokens;
wxString m_string; // the string we tokenize
wxString::const_iterator m_stringEnd;
// FIXME-UTF8: use wxWcharBuffer
wxWxCharBuffer m_delims; // all possible delimiters
size_t m_delimsLen;
wxString::const_iterator m_pos; // the current position in m_string
wxStringTokenizerMode m_mode; // see wxTOKEN_XXX values
wxChar m_lastDelim; // delimiter after last token or '\0'
};
// ----------------------------------------------------------------------------
// convenience function which returns all tokens at once
// ----------------------------------------------------------------------------
// the function takes the same parameters as wxStringTokenizer ctor and returns
// the array containing all tokens
wxArrayString WXDLLIMPEXP_BASE
wxStringTokenize(const wxString& str,
const wxString& delims = wxDEFAULT_DELIMITERS,
wxStringTokenizerMode mode = wxTOKEN_DEFAULT);
#endif // _WX_TOKENZRH