유니코드 한글 자소 문자 처리


한글 음절 문자를 자소 단위로 분해하거나 또는 반대로 자소 문자들을 합쳐서 하나의 음절 문자로 만드는 자바 소스 코드이다. 이에 관한 간략한 배경 지식이 필요한 경우, 이 글을 참고하기 바란다.

package kr.pe.elex.hangeul;
/**
* Created by Elex on 2014-07-11.
*/
public class HCharacter {
    
// 한글 음절
          
 protected static final char HANGEUL_SYLLABLE_BEGIN ' \uAC00' ;
    
protected static final char HANGEUL_SYLLABLE_END ' \uD7AF' ;

    
// 한글 자모
          
 protected static final char HANGEUL_JAMO_BEGIN ' \u1100 ';
    
protected static final char HANGEUL_JAMO_END ' \u11FF ';

    
protected static final char HANGEUL_JAMO_CHOSEONG_BEGIN ' \u1100' ;
    
protected static final char HANGEUL_JAMO_CHOSEONG_END ' \u1112' ;
    
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_BEGIN ' \u1113' ;
    
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_END ' \u115F' ;
    
protected static final char HANGEUL_JAMO_JUNGSEONG_BEGIN ' \u1161' ;
    
protected static final char HANGEUL_JAMO_JUNGSEONG_END ' \u1175' ;
    
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN ' \u1176' ;
    
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_END ' \u11A7' ;
    
protected static final char HANGEUL_JAMO_JONGSEONG_BEGIN ' \u11A8' ;
    
protected static final char HANGEUL_JAMO_JONGSEONG_END ' \u11C2' ;
    
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_BEGIN ' \u11C3' ;
    
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_END ' \u11FF' ;

    
// 한글 호환 자모
          
 protected static final char HANGEUL_COMPAT_JAMO_BEGIN ' \u3131' ;
    
protected static final char HANGEUL_COMPAT_JAMO_END ' \u318E' ;

    
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_BEGIN ' \u3131' ;
    
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_END ' \u314E' ;
    
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_BEGIN ' \u314F' ;
    
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_END ' \u3163' ;
    
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN ' \u3165' ;
    
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_END ' \u3186' ;
    
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN ' \u3187' ;
    
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_END ' \u318E' ;

    
//
    
static final char HANGUL_CHOSEONG_KIYEOK ' \u1100 ';
    
static final char HANGUL_CHOSEONG_SSANGKIYEOK ' \u1101 ';
    
static final char HANGUL_CHOSEONG_NIEUN ' \u1102 ';
    
static final char HANGUL_CHOSEONG_TIKEUT ' \u1103 ';
    
static final char HANGUL_CHOSEONG_SSANGTIKEUT ' \u1104 ';
    
static final char HANGUL_CHOSEONG_RIEUL ' \u1105 ';
    
static final char HANGUL_CHOSEONG_MIEUM ' \u1106 ';
    
static final char HANGUL_CHOSEONG_PIEUP ' \u1107 ';
    
static final char HANGUL_CHOSEONG_SSANGPIEUP ' \u1108 ';
    
static final char HANGUL_CHOSEONG_SIOS ' \u1109 ';
    
static final char HANGUL_CHOSEONG_SSANGSIOS ' \u110A ';
    
static final char HANGUL_CHOSEONG_IEUNG ' \u110B ';
    
static final char HANGUL_CHOSEONG_CIEUC ' \u110C ';
    
static final char HANGUL_CHOSEONG_SSANGCIEUC ' \u110D ';
    
static final char HANGUL_CHOSEONG_CHIEUCH ' \u110E ';
    
static final char HANGUL_CHOSEONG_KHIEUKH ' \u110F ';
    
static final char HANGUL_CHOSEONG_THIEUTH ' \u1110 ';
    
static final char HANGUL_CHOSEONG_PHIEUPH ' \u1111 ';
    
static final char HANGUL_CHOSEONG_HIEUH ' \u1112 ';

    
static final char HANGUL_JUNGSEONG_A ' \u1161' ;
    
static final char HANGUL_JUNGSEONG_AE ' \u1162 ';
    
static final char HANGUL_JUNGSEONG_YA ' \u1163 ';
    
static final char HANGUL_JUNGSEONG_YAE ' \u1164 ';
    
static final char HANGUL_JUNGSEONG_EO ' \u1165 ';
    
static final char HANGUL_JUNGSEONG_E ' \u1166' ;
    
static final char HANGUL_JUNGSEONG_YEO ' \u1167 ';
    
static final char HANGUL_JUNGSEONG_YE ' \u1168 ';
    
static final char HANGUL_JUNGSEONG_O ' \u1169' ;
    
static final char HANGUL_JUNGSEONG_WA ' \u116A ';
    
static final char HANGUL_JUNGSEONG_WAE ' \u116B ';
    
static final char HANGUL_JUNGSEONG_OE ' \u116C ';
    
static final char HANGUL_JUNGSEONG_YO ' \u116D ';
    
static final char HANGUL_JUNGSEONG_U ' \u116E' ;
    
static final char HANGUL_JUNGSEONG_WEO ' \u116F ';
    
static final char HANGUL_JUNGSEONG_WE ' \u1170 ';
    
static final char HANGUL_JUNGSEONG_WI ' \u1171 ';
    
static final char HANGUL_JUNGSEONG_YU ' \u1172 ';
    
static final char HANGUL_JUNGSEONG_EU ' \u1173 ';
    
static final char HANGUL_JUNGSEONG_YI ' \u1174 ';
    
static final char HANGUL_JUNGSEONG_I ' \u1175' ;

    
static final char HANGUL_JONGSEONG_KIYEOK ' \u11A8 ';
    
static final char HANGUL_JONGSEONG_SSANGKIYEOK ' \u11A9 ';
    
static final char HANGUL_JONGSEONG_KIYEOK_SIOS ' \u11AA ';
    
static final char HANGUL_JONGSEONG_NIEUN ' \u11AB ';
    
static final char HANGUL_JONGSEONG_NIEUN_CIEUC ' \u11AC ';
    
static final char HANGUL_JONGSEONG_NIEUN_HIEUH ' \u11AD ';
    
static final char HANGUL_JONGSEONG_TIKEUT ' \u11AE ';
    
static final char HANGUL_JONGSEONG_RIEUL ' \u11AF ';
    
static final char HANGUL_JONGSEONG_RIEUL_KIYEOK ' \u11B0' ;
    
static final char HANGUL_JONGSEONG_RIEUL_MIEUM ' \u11B1 ';
    
static final char HANGUL_JONGSEONG_RIEUL_PIEUP ' \u11B2 ';
    
static final char HANGUL_JONGSEONG_RIEUL_SIOS ' \u11B3 ';
    
static final char HANGUL_JONGSEONG_RIEUL_THIEUTH ' \u11B4' ;
    
static final char HANGUL_JONGSEONG_RIEUL_PHIEUPH ' \u11B5' ;
    
static final char HANGUL_JONGSEONG_RIEUL_HIEUH ' \u11B6 ';
    
static final char HANGUL_JONGSEONG_MIEUM ' \u11B7 ';
    
static final char HANGUL_JONGSEONG_PIEUP ' \u11B8 ';
    
static final char HANGUL_JONGSEONG_PIEUP_SIOS ' \u11B9 ';
    
static final char HANGUL_JONGSEONG_SIOS ' \u11BA ';
    
static final char HANGUL_JONGSEONG_SSANGSIOS ' \u11BB ';
    
static final char HANGUL_JONGSEONG_IEUNG ' \u11BC ';
    
static final char HANGUL_JONGSEONG_CIEUC ' \u11BD ';
    
static final char HANGUL_JONGSEONG_CHIEUCH ' \u11BE ';
    
static final char HANGUL_JONGSEONG_KHIEUKH ' \u11BF ';
    
static final char HANGUL_JONGSEONG_THIEUTH ' \u11C0 ';
    
static final char HANGUL_JONGSEONG_PHIEUPH ' \u11C1 ';
    
static final char HANGUL_JONGSEONG_HIEUH ' \u11C2 ';

    
public static final char FILLER 0 ;

    
private char character ;
    
private char initialConsonant , medialVowel finalConsonant ;

    
public HCharacter( char character) throws OutOfUnicodeRangeException {
         
 if (isSyllableLetter (character)) {
             
 this. character = character;
             
 this. finalConsonant getJongSeong( this. character );
             
 this. medialVowel getJungSeong( this. character );
             
 this. initialConsonant getChoSeong( this. character );

         } 
else if isJamoLetter(character)) {
             
 this. character = character;
             
 this. finalConsonant FILLER;
             
 this. medialVowel FILLER;
             
 this. initialConsonant FILLER;

         } 
else if isJamoCompatLetter(character)) {
             
 this. character = character;
             
 this. finalConsonant FILLER;
             
 this. medialVowel FILLER;
             
 this. initialConsonant FILLER;

         } 
else {
             
 throw new OutOfUnicodeRangeException(character);
         }
    }

    
public HCharacter( char initialConsonant, char medialVowel, char finalConsonant) throwsOutOfUnicodeRangeException {
         
 // 한글 호환 자모의 코드를 그냥 자모로 변환한다 .
         
 if (isJamoCompatLetter (initialConsonant)) {
             
 for ( char [] row : CONV_INITIAL_CONSONANTS ) {
                 
 if (row[ 1 ] == initialConsonant) {
                      initialConsonant = row[
 0 ];
                      
 break ;
                 }
             }
         }

         
 if (isJamoCompatLetter (medialVowel)) {
             medialVowel -= 
HANGEUL_COMPAT_JAMO_VOWEL_BEGIN ;
             medialVowel += 
HANGEUL_JAMO_JUNGSEONG_BEGIN ;
         }

         
 if (isJamoCompatLetter (finalConsonant)) {
             
 for ( char [] row : CONV_FINAL_CONSONANTS ) {
                 
 if (row[ 1 ] == finalConsonant) {
                      finalConsonant = row[
 0 ];
                      
 break ;
                 }
             }
         }

         
 // 옛한글은 완성형 문자 없음
                   
 if (!isJamoLetter(initialConsonant) || isOldLetter(initialConsonant))
             
 throw new OutOfUnicodeRangeException(initialConsonant);
         
 if (! isJamoLetter(medialVowel) || isOldLetter(medialVowel))
             
 throw new OutOfUnicodeRangeException(medialVowel);
         
 if (finalConsonant!= FILLER && (!isJamoLetter(finalConsonant) || isOldLetter(finalConsonant)))
             
 throw new OutOfUnicodeRangeException(finalConsonant);

         
 this. initialConsonant = initialConsonant;
         
 this. medialVowel = medialVowel;
         
 this. finalConsonant = finalConsonant;

         
 // 자모 결합
                   
 initialConsonant -= HANGEUL_JAMO_CHOSEONG_BEGIN ;
         medialVowel -= 
HANGEUL_JAMO_JUNGSEONG_BEGIN ;
         
 if (finalConsonant!= FILLER) {
             finalConsonant -= 
HANGEUL_JAMO_JONGSEONG_BEGIN ;
         }

         
 int val = 0;
         val += initialConsonant * 
28 21 ;
         val += medialVowel * 
28;
         
 if (finalConsonant!= FILLER) val += finalConsonant + 1 ;
         val += 
HANGEUL_SYLLABLE_BEGIN;

         
 this. character = ( char )val;
    }

    
public HCharacter( char initialConsonant, char medialVowel) throws OutOfUnicodeRangeException {
         
 this(initialConsonant, medialVowel, FILLER );
    }

    
public char getChar(){
         
 return character ;
    }

    
public char getChoSeong() {
         
 return initialConsonant ;
    }

    
public char getJungSeong() {
         
 return medialVowel ;
    }

    
public char getJongSeong() {
         
 return finalConsonant ;
    }

    
public char getChoSeongCompat() {
         
 return toCharCompat( initialConsonant );
    }

    
public char getJungSeongCompat() {
         
 return toCharCompat( medialVowel );
    }

    
public char getJongSeongCompat() {
         
 return toCharCompat( finalConsonant );
    }

    
protected static char toCharCompat( char ch){
         
 if (! isJamoLetter(ch)) return ch;

         
 if (isVowelLetter (ch)){
             
 return ( char ) (ch - HANGEUL_JAMO_JUNGSEONG_BEGIN HANGEUL_COMPAT_JAMO_VOWEL_BEGIN );
         } 
else {
             
 for ( char [] row : CONV_INITIAL_CONSONANTS ) {
                 
 if (row[ 0 ] == ch) {
                      
 return row[1 ];
                 }
             }
             
 for ( char [] row : CONV_FINAL_CONSONANTS ) {
                 
 if (row[ 0 ] == ch) {
                      
 return row[1 ];
                 }
             }
         }
         
 return ' \u3164 ';
    }

    
protected static char getChoSeong( char character) {
         
 int val = ( int)character;
         val -= 
HANGEUL_SYLLABLE_BEGIN;
         val = val / 
28 21 ;
         val += 
HANGEUL_JAMO_CHOSEONG_BEGIN;
         
 return ( char) val;
    }

    
protected static char getJungSeong( char character) {
         
 int val = ( int)character;
         val -= 
HANGEUL_SYLLABLE_BEGIN;
         val = val / 
28 21 ;
         val += 
HANGEUL_JAMO_JUNGSEONG_BEGIN;
         
 return ( char) val;
    }

    
protected static char getJongSeong( char character) {
         
 int val = ( int)character;
         val -= 
HANGEUL_SYLLABLE_BEGIN;
         val = val % 
28 1 ;
         
 if (val == - 1return FILLER//종성없음
                   
 val += HANGEUL_JAMO_JONGSEONG_BEGIN;
         
 return ( char) val;
    }

    
public String toString(){
         
 return String. valueOf( character);
    }

    
public boolean isJamoLetter(){
         
 return isJamoLetter( character );
    }

    
protected static boolean isJamoLetter( char ch){
         
 return (ch>= HANGEUL_JAMO_BEGIN && ch<=HANGEUL_JAMO_END );
    }

    
public boolean isJamoCompatLetter(){
         
 return isJamoCompatLetter( character );
    }

    
protected static boolean isJamoCompatLetter( char ch){
         
 return (ch>= HANGEUL_COMPAT_JAMO_BEGIN && ch<=HANGEUL_COMPAT_JAMO_END );
    }

    
public boolean isSyllableLetter(){
         
 return isSyllableLetter( character );
    }

    
protected static boolean isSyllableLetter( char ch){
         
 return (ch>= HANGEUL_SYLLABLE_BEGIN && ch<=HANGEUL_SYLLABLE_END );
    }

    
protected static boolean isVowelLetter( char ch) {
         
 return (ch>= HANGEUL_JAMO_JUNGSEONG_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_END ) ||
                 (ch>=
 HANGEUL_COMPAT_JAMO_VOWEL_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_END ) ||
                 (ch>=
 HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
                 (ch>=
 HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
    }

    
protected static boolean isConsonantLetter( char ch) {
         
 if (isJamoLetter (ch) || isJamoCompatLetter(ch)) {
             
 return isVowelLetter(ch);
         }
         
 return false;
    }

    
protected static boolean isOldLetter( char ch){
         
 return (ch>= HANGEUL_JAMO_CHOSEONG_OLD_BEGIN && ch<= HANGEUL_JAMO_CHOSEONG_OLD_END) ||
                 (ch>=
 HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
                 (ch>=
 HANGEUL_JAMO_JONGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JONGSEONG_OLD_END ) ||
                 (ch>=
 HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
    }

    
private static final char [][] CONV_INITIAL_CONSONANTS new char[][] {
             {
 HANGUL_CHOSEONG_KIYEOK' \u3131 '}, //
                             
 { HANGUL_CHOSEONG_SSANGKIYEOK' \u3132 '}, //
                             
 { HANGUL_CHOSEONG_NIEUN' \u3134 '}, //
                             
 { HANGUL_CHOSEONG_TIKEUT' \u3137 '}, //
                             
 { HANGUL_CHOSEONG_SSANGTIKEUT' \u3138 '}, //
                             
 { HANGUL_CHOSEONG_RIEUL' \u3139 '}, //
                             
 { HANGUL_CHOSEONG_MIEUM' \u3141 '}, //
                             
 { HANGUL_CHOSEONG_PIEUP' \u3142 '}, //
                             
 { HANGUL_CHOSEONG_SSANGPIEUP' \u3143 '}, //
                             
 { HANGUL_CHOSEONG_SIOS' \u3145 '}, //
                             
 { HANGUL_CHOSEONG_SSANGSIOS' \u3146 '}, //
                             
 { HANGUL_CHOSEONG_IEUNG' \u3147 '}, //
                             
 { HANGUL_CHOSEONG_CIEUC' \u3148 '}, //
                             
 { HANGUL_CHOSEONG_SSANGCIEUC' \u3149 '}, //
                             
 { HANGUL_CHOSEONG_CHIEUCH' \u314A '}, //
                             
 { HANGUL_CHOSEONG_KHIEUKH' \u314B '}, //
                             
 { HANGUL_CHOSEONG_THIEUTH' \u314C '}, //
                             
 { HANGUL_CHOSEONG_PHIEUPH' \u314D '}, //
                             
 { HANGUL_CHOSEONG_HIEUH' \u314E '} //
          
};

    
private static final char [][] CONV_FINAL_CONSONANTS new char[][] {
             {
 HANGUL_JONGSEONG_KIYEOK' \u3131 '}, //
                             
 { HANGUL_JONGSEONG_SSANGKIYEOK' \u3132 '}, //
                             
 { HANGUL_JONGSEONG_KIYEOK_SIOS' \u3133 '}, //
                             
 { HANGUL_JONGSEONG_NIEUN' \u3134 '}, //
                             
 { HANGUL_JONGSEONG_NIEUN_CIEUC' \u3135 '}, //
                             
 { HANGUL_JONGSEONG_NIEUN_HIEUH' \u3136 '}, //
                             
 { HANGUL_JONGSEONG_TIKEUT' \u3137 '}, //
                             
 { HANGUL_JONGSEONG_RIEUL' \u3139 '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_KIYEOK' \u313A '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_MIEUM' \u313B '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_PIEUP' \u313C '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_SIOS' \u313D '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_THIEUTH , ' \u313E '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_PHIEUPH' \u313F '}, //
                             
 { HANGUL_JONGSEONG_RIEUL_HIEUH' \u3140 '}, //
                             
 { HANGUL_JONGSEONG_MIEUM' \u3141 '}, //
                             
 { HANGUL_JONGSEONG_PIEUP' \u3142 '}, //
                             
 { HANGUL_JONGSEONG_PIEUP_SIOS' \u3144 '}, //
                             
 { HANGUL_JONGSEONG_SIOS' \u3145 '}, //
                             
 { HANGUL_JONGSEONG_SSANGSIOS' \u3146 '}, //
                             
 { HANGUL_JONGSEONG_IEUNG' \u3147 '}, //
                             
 { HANGUL_JONGSEONG_CIEUC' \u3148 '}, //
                             
 { HANGUL_JONGSEONG_CHIEUCH' \u314A '}, //
                             
 { HANGUL_JONGSEONG_KHIEUKH' \u3148 '}, //
                             
 { HANGUL_JONGSEONG_THIEUTH' \u314C '}, //
                             
 { HANGUL_JONGSEONG_PHIEUPH' \u314D '}, //
                             
 { HANGUL_JONGSEONG_HIEUH' \u314E '} //
          
};

    
public class OutOfUnicodeRangeException extends Exception {
         
 private char character ;

         
 public OutOfUnicodeRangeException( char c) {
             
 this. character = c;
         }

         
 public String toString(){
             
 return Integer. toHexString(( int) character);
         }
    }

    
public static void main(String[] args) throws OutOfUnicodeRangeException {
         
 //Usage 1.
         
HCharacter character = new HCharacter( ' ' );

        System.
 out.print(character.getChoSeongCompat());
        System.
 out.print(character.getJungSeongCompat());
        System.
 out.print(character.getJongSeongCompat());
         System.
 out.println();

         
 //Usage 2.
         
System. out .println( new HCharacter( ' ' , '  '));
         System.
 out.println( new HCharacter( '  '' ' , '  '));

    }
}

댓글

이 블로그의 인기 게시물

자바 암호화 확장 (JCE) 관련 자바 1.8.0_151 이후 변경 사항

좌표 변환: 회전 이동

Apache Commons CSV