사용자 도구

사이트 도구


kb:ternarysearchtree

Ternary Search Tree

사전을 만들어 보아요.

소스

http://www.octavian.org/cs/software.html 에서 받은 소스를 템플릿 버전으로 변경한 후, 한글 관련 처리와, near-neightbor 서치 기능을 집어넣은 소스다.

////////////////////////////////////////////////////////////////////////////////
/// \file TernarySearchTree.h
/// \author excel96
/// \date 2006.7.24
///
/// http://www.octavian.org/cs/software.html 페이지 참고
////////////////////////////////////////////////////////////////////////////////
 
#ifndef __TERNARYSEARCHTREE_H__
#define __TERNARYSEARCHTREE_H__
 
#include <vector>
 
template <typename T>
struct ptr_delete_policy { void operator()(T ptr) { delete ptr; } };
 
template <typename T>
struct array_delete_policy { void operator()(T ptr) { delete [] ptr; } };
 
template <typename T>
struct no_delete_policy { void operator()(T ptr) {} };
 
 
////////////////////////////////////////////////////////////////////////////////
/// \class cTernarySearchTree
/// \brief Ternary Search Tree
///
/// http://www.octavian.org/cs/software.html 페이지 참고
////////////////////////////////////////////////////////////////////////////////
 
template 
<
    typename T, 
    template <typename> class DP = no_delete_policy
>
class cTernarySearchTree
{
public:
    /// 각종 상수.
    enum { TST_OK, TST_ERROR, TST_NULL_KEY, TST_DUPLICATE_KEY };
 
    /// 횡단 콜백 함수.
    typedef void (*PFNTRAVERSE)(T* data);
 
    /// 매치를 반환하기 위한 컬렉션.
    typedef std::vector<T*> MATCHES;
 
 
private:
    /// 트리 기본 노드.
    class cNode
    {
    public:
        char   Value;  ///< 문자열 값.
        cNode* Left;   ///< 왼쪽 자식.
        cNode* Middle; ///< 중간 자식 or 문자열과 함께 들어온 포인터.
        cNode* Right;  ///< 오른쪽 자식.
 
        /// \brief 생성자.
        cNode(char v = 0, cNode* l = NULL, cNode* m = NULL, cNode* r = NULL)
            : Value(v), Left(l), Middle(m), Right(r)
        {
        }
    };
 
    /// 노드 풀.
    class cNodeLines
    {
    public:
        cNode*      Line; ///< 노드들.
        cNodeLines* Next; ///< 다음 풀.
 
        /// \brief 생성자.
        cNodeLines(size_t width, cNodeLines* next = NULL)
            : Line(new cNode[width]), Next(next)
        {
            cNode* current = Line;
            for (size_t i = 1; i < width; i++)
            {
                current->Middle = &Line[i];
                current = current->Middle;
            }
 
            current->Middle = NULL;
        }
 
        /// \brief 소멸자.
        ~cNodeLines()
        {
            if (Line) { delete [] Line; }
            if (Next) { delete Next; }
        }
    };
 
 
private:
    size_t      m_NodeLineWidth; ///< 노드가 모자랄 때마다 한번에 할당할 노드의 갯수.
    cNodeLines* m_NodeLines;     ///< 노드 풀.
    cNode*      m_FreeList;      ///< 노드 풀 안의 인덱스.
    cNode*      m_Head[256];     ///< 루트 노드들.
 
 
public:
    /// \brief 생성자.
    cTernarySearchTree(size_t nodeLineWidth=16);
 
    /// \brief 소멸자.
    virtual ~cTernarySearchTree();
 
 
public:
    /// \brief 새로운 문자열을 집어넣는다.
    int Insert(const char* key, T* data, bool replace = false, T** prevEntry = NULL);
 
    /// \brief 문자열을 검색한다.
    T* Search(const char* key) const;
 
    /// \brief 근접 문자열을 검색한다.
    void NearSearch(const char* key, int distance, MATCHES& matches) const;
 
    /// \brief 문자열을 삭제한다.
    T* Delete(const char* key);
 
    /// \brief 모든 아이템에 대해 주어진 함수를 실행한다.
    void Traverse(PFNTRAVERSE callback);
 
 
private:
    /// \brief 프리 리스트의 크기를 증가시킨다.
    void GrowNodeFreeList();
 
    /// \brief 근접 문자열을 검색한다.
    void NearSearch(cNode* node, const char* key, int distance, MATCHES& matches) const;
 
    /// \brief 모든 아이템에 대해 주어진 함수를 실행한다.
    void Traverse(cNode* node, PFNTRAVERSE callback);
 
    /// \brief 삭제 함수.
    static void Delete(T* data);
};
 
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 생성자.
/// \param nodeLineWidth 한번에 할당할 노드의 갯수.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
cTernarySearchTree<T, DP>::cTernarySearchTree(size_t nodeLineWidth)
: m_NodeLineWidth(nodeLineWidth), 
m_NodeLines(new cNodeLines(nodeLineWidth)),
m_FreeList(NULL)
{
    memset(m_Head, 0, sizeof(m_Head));
    m_FreeList = m_NodeLines->Line;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 소멸자.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
cTernarySearchTree<T, DP>::~cTernarySearchTree()
{
    Traverse(Delete);
 
    if (m_NodeLines)
        delete m_NodeLines;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 새로운 문자열을 집어넣는다.
/// \param key 문자열.
/// \param data 문자열과 함께 할당할 데이터.
/// \param replace 겹치는 데이터가 있다면 교체할 것인가의 여부.
/// \param prevEntry 겹치는 데이터가 있을 경우, 데이터 값을 집어넣을 변수.
/// \return int 정상적인 경우 TST_OK, 에러가 생긴 경우에는 다른 값을 반환한다.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
int cTernarySearchTree<T, DP>::Insert(
    const char* key, T* data, bool replace, T** prevEntry)
{
    cNode* currentNode = NULL;
    cNode* newNodeTreeBegin = NULL;
    int keyIndex = 0;
    bool performLoop = true;
 
    if (key == NULL)
        return TST_NULL_KEY;
 
    if (key[0] == 0)
        return TST_NULL_KEY;
 
    if (m_Head[(unsigned char)key[0]] == NULL)
    {
        if (m_FreeList == NULL)
            GrowNodeFreeList();
 
        m_Head[(unsigned char)key[0]] = m_FreeList;
        m_FreeList = m_FreeList->Middle;
        currentNode = m_Head[(unsigned char)key[0]];
        currentNode->Value = key[1];
        if (key[1] == 0)
        {
            currentNode->Middle = (cNode*)data; // argh
            return TST_OK;
        }
        else
            performLoop = false;
    }
 
    currentNode = m_Head[(unsigned char)key[0]];
    keyIndex = 1;
    while (performLoop)
    {
        if (key[keyIndex] == currentNode->Value)
        {
            if (key[keyIndex] == 0)
            {
                if (replace)
                {
                    if (prevEntry)
                        *prevEntry = (T*)currentNode->Middle;
 
                    currentNode->Middle = (cNode*)data; // argh
 
                    return TST_OK;
                }
                else
                {
                    if (prevEntry)
                        *prevEntry = (T*)currentNode->Middle;
 
                    return TST_DUPLICATE_KEY;
                }
            }
            else
            {
                if (currentNode->Middle == NULL)
                {
                    if (m_FreeList == NULL)
                        GrowNodeFreeList();
 
                    currentNode->Middle = m_FreeList;
                    m_FreeList = m_FreeList->Middle;
                    newNodeTreeBegin = currentNode;
                    currentNode = currentNode->Middle;
                    currentNode->Value = key[keyIndex];
                    break;
                }
                else
                {
                    currentNode = currentNode->Middle;
                    keyIndex++;
                    continue;
                }
            }
        }
 
        if ( ((currentNode->Value == 0) && (key[keyIndex] < 64)) ||
            ((currentNode->Value != 0) && (key[keyIndex] < currentNode->Value)) )
        {
            if (currentNode->Left == NULL)
            {
                if (m_FreeList == NULL)
                    GrowNodeFreeList();
 
                currentNode->Left = m_FreeList;
                m_FreeList = m_FreeList->Middle;
                newNodeTreeBegin = currentNode;
                currentNode = currentNode->Left;
                currentNode->Value = key[keyIndex];
                if (key[keyIndex] == 0)
                {
                    currentNode->Middle = (cNode*)data; // argh
                    return TST_OK;
                }
                else
                {
                    break;
                }
            }
            else
            {
                currentNode = currentNode->Left;
                continue;
            }
        }
        else
        {
            if (currentNode->Right == NULL)
            {
                if (m_FreeList == NULL)
                    GrowNodeFreeList();
 
                currentNode->Right = m_FreeList;
                m_FreeList = m_FreeList->Middle;
                newNodeTreeBegin = currentNode;
                currentNode = currentNode->Right;
                currentNode->Value = key[keyIndex];
                break;
            }
            else
            {
                currentNode = currentNode->Right;
                continue;
            }
        }
    }
 
    do
    {
        keyIndex++;
 
        if (m_FreeList == NULL)
            GrowNodeFreeList();
 
        currentNode->Middle = m_FreeList;
        m_FreeList = m_FreeList->Middle;
        currentNode = currentNode->Middle;
        currentNode->Value = key[keyIndex];
    } while (key[keyIndex] != 0);
 
    currentNode->Middle = (cNode*)data; // argh
 
    return TST_OK;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 문자열을 검색한다.
/// \param key 검색할 문자열.
/// \return T* 해당하는 데이터가 있는 경우에는 그 데이터를 반환하고,
/// 없는 경우에는 NULL을 반환한다.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
T* cTernarySearchTree<T, DP>::Search(const char* key) const
{
    if (key[0] == 0)
        return NULL;
 
    if (m_Head[(unsigned char)key[0]] == NULL)
        return NULL;
 
    cNode* currentNode = m_Head[(unsigned char)key[0]];
    int keyIndex = 1;
 
    while (currentNode != NULL)
    {
        if (key[keyIndex] == currentNode->Value)
        {
            if (currentNode->Value == 0)
                return currentNode->Middle;
            else
            {
                currentNode = currentNode->Middle;
                keyIndex++;
                continue;
            }
        }
        else if (
            (currentNode->Value == 0 && key[keyIndex] < 64)
            ||
            (currentNode->Value != 0 && key[keyIndex] < currentNode->Value)
            )
        {
            currentNode = currentNode->Left;
            continue;
        }
        else
        {
            currentNode = currentNode->Right;
            continue;
        }
    }
 
    return NULL;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 근접 문자열을 검색한다.
/// \param key 문자열.
/// \param distance 틀려도 되는 글자의 갯수.
/// \param matches 결과를 찾은 경우 그 결과를 집어넣을 컬렉션.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::NearSearch(
    const char* key, int distance, MATCHES& matches) const
{
    NearSearch(m_Head[(unsigned char)key[0]], key + 1, distance, matches);
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 근접 문자열을 검색한다.
/// \param node 검색의 기준이 되는 노드.
/// \param key 문자열.
/// \param distance 틀려도 되는 글자의 갯수.
/// \param matches 결과를 찾은 경우 그 결과를 집어넣을 컬렉션.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::NearSearch(
    cNode* node, const char* key, int distance, MATCHES& matches) const
{
    if (!node || distance < 0) return; 
 
    if (distance > 0 || ((unsigned char)(*key)) < ((unsigned char)(node->Value))) 
        NearSearch(node->Left, key, distance, matches); 
 
    if (node->Value == 0) 
    { 
        if ((int)strlen(key) <= distance) 
            matches.push_back((T*)node->Middle);
    } 
    else 
    {
        NearSearch(
            node->Middle, 
            *key ? key + 1 : key, 
            (*key == node->Value) ? distance : distance - 1,
            matches
            ); 
    }
 
    if (distance > 0 || ((unsigned char)(*key)) > ((unsigned char)(node->Value))) 
        NearSearch(node->Right, key, distance, matches); 
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 문자열을 삭제한다.
/// \param key 삭제할 문자열.
/// \return T* 문자열과 함께 들어왔던 포인터.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
T* cTernarySearchTree<T, DP>::Delete(const char* key)
{
    cNode* currentNode = NULL;
    cNode* currentNodeParent = NULL;
    cNode* lastBranch = NULL;
    cNode* lastBranchParent = NULL;
    cNode* nextNode = NULL;
    cNode* lastBranchReplacement = NULL;
    cNode* lastBranchDanglingChild = NULL;
    int keyIndex = 0;
 
    if (key[0] == 0)
        return NULL;
 
    if (m_Head[(unsigned char)key[0]] == NULL)
        return NULL;
 
    lastBranch = NULL;
    lastBranchParent = NULL;
    currentNode = m_Head[(unsigned char)key[0]];
    currentNodeParent = NULL;
    keyIndex = 1;
 
    while (currentNode != NULL)
    {
        if (key[keyIndex] == currentNode->Value)
        {
            if (currentNode->Left != NULL || currentNode->Right != NULL)
            {
                lastBranch = currentNode;
                lastBranchParent = currentNodeParent;
            }
 
            if (key[keyIndex] == 0)
            {
                break;
            }
            else
            {
                currentNodeParent = currentNode;
                currentNode = currentNode->Middle;
                keyIndex++;
                continue;
            }
        }
        else if ( 
            (currentNode->Value == 0 && key[keyIndex] < 64) 
            ||
            (currentNode->Value != 0 && key[keyIndex] < currentNode->Value)
            )
        {
            lastBranchParent = currentNode;
            currentNodeParent = currentNode;
            currentNode = currentNode->Left;
            lastBranch = currentNode;
            continue;
        }
        else
        {
            lastBranchParent = currentNode;
            currentNodeParent = currentNode;
            currentNode = currentNode->Right;
            lastBranch = currentNode;
            continue;
        }
 
    }
 
    if (currentNode == NULL)
        return NULL;
 
    if (lastBranch == NULL)
    {
        nextNode = m_Head[(unsigned char)key[0]];
        m_Head[(unsigned char)key[0]] = NULL;
    }
    else if ( (lastBranch->Left == NULL) && (lastBranch->Right == NULL) )
    {
        if (lastBranchParent->Left == lastBranch)
            lastBranchParent->Left = NULL;
        else
            lastBranchParent->Right = NULL;
 
        nextNode = lastBranch;
    }
    else
    {
        if (lastBranch->Left != NULL && lastBranch->Right != NULL)
        {
            lastBranchReplacement = lastBranch->Right;
            lastBranchDanglingChild = lastBranch->Left;
        }
        else if (lastBranch->Right != NULL)
        {
            lastBranchReplacement = lastBranch->Right;
            lastBranchDanglingChild = NULL;
        }
        else
        {
            lastBranchReplacement = lastBranch->Left;
            lastBranchDanglingChild = NULL;
        }
 
        if (lastBranchParent == NULL)
        {
            m_Head[(unsigned char)key[0]]=lastBranchReplacement;
        }
        else
        {
            if (lastBranchParent->Left == lastBranch)
                lastBranchParent->Left = lastBranchReplacement;
            else if (lastBranchParent->Right == lastBranch)
                lastBranchParent->Right = lastBranchReplacement;
            else
                lastBranchParent->Middle = lastBranchReplacement;
        }
 
        if (lastBranchDanglingChild != NULL)
        {
            currentNode = lastBranchReplacement;
 
            while (currentNode->Left != NULL)
                currentNode = currentNode->Left;
 
            currentNode->Left = lastBranchDanglingChild;
        }
 
        nextNode = lastBranch;
    }
 
    do
    {
        currentNode = nextNode;
        nextNode = currentNode->Middle;
        currentNode->Left = NULL;
        currentNode->Right = NULL;
        currentNode->Middle = m_FreeList;
        m_FreeList = currentNode;
    }
    while (currentNode->Value != 0);
 
    return nextNode;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 모든 아이템에 대해 주어진 함수를 실행한다.
/// \param callback 실행할 함수.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::Traverse(PFNTRAVERSE callback)
{
    for (int i=0; i<256; ++i)
        Traverse(m_Head[i], callback);
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 모든 아이템에 대해 주어진 함수를 실행한다.
/// \param node 노드.
/// \param callback 실행할 함수.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::Traverse(cNode* node, PFNTRAVERSE callback)
{
    if (!node) return;
 
    Traverse(node->Left, callback);
 
    if (node->Value != 0)
        Traverse(node->Middle, callback);
    else
        callback((T*)node->Middle);
 
    Traverse(node->Right, callback);
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 프리 리스트의 크기를 증가시킨다.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::GrowNodeFreeList()
{
    m_NodeLines = new cNodeLines(m_NodeLineWidth, m_NodeLines);
    m_FreeList = m_NodeLines->Line;
}
 
////////////////////////////////////////////////////////////////////////////////
/// \brief 삭제 함수.
/// \param data 삭제할 객체.
////////////////////////////////////////////////////////////////////////////////
template <typename T, template <typename> class DP>
void cTernarySearchTree<T, DP>::Delete(T* data)
{
    DP<T*> doDelete;
    doDelete(data);
}
 
#endif

예제

cTernarySearchTree<std::string, ptr_delete_policy> t;
t.Insert("hello", new std::string("hello"));
t.Insert("world", new std::string("world"));
 
cTernarySearchTree<std::string>::MATCHES matches;
t.NearSearch("h", 4, matches);
for (size_t i=0; i<matches.size(); ++i)
{
    std::string* str = matches[i];
    std::cout << *str << endl;
}

링크

kb/ternarysearchtree.txt · 마지막으로 수정됨: 2014/11/09 20:57 (바깥 편집)