유니코드 한글 자소 문자 처리
한글 음절 문자를 자소 단위로 분해하거나 또는 반대로 자소 문자들을 합쳐서 하나의 음절 문자로 만드는 자바 소스 코드이다. 이에 관한 간략한 배경 지식이 필요한 경우, 이 글을 참고하기 바란다.
package kr.pe.elex.hangeul;
/**
* Created by Elex on 2014-07-11.
*/public class HCharacter {
// 한글 음절
protected static final char HANGEUL_SYLLABLE_BEGIN = ' \uAC00' ;
protected static final char HANGEUL_SYLLABLE_END = ' \uD7AF' ;
// 한글 자모
protected static final char HANGEUL_JAMO_BEGIN = ' \u1100 ';
protected static final char HANGEUL_JAMO_END = ' \u11FF ';
protected static final char HANGEUL_JAMO_CHOSEONG_BEGIN = ' \u1100' ;
protected static final char HANGEUL_JAMO_CHOSEONG_END = ' \u1112' ;
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_BEGIN = ' \u1113' ;
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_END = ' \u115F' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_BEGIN = ' \u1161' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_END = ' \u1175' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN = ' \u1176' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_END = ' \u11A7' ;
protected static final char HANGEUL_JAMO_JONGSEONG_BEGIN = ' \u11A8' ;
protected static final char HANGEUL_JAMO_JONGSEONG_END = ' \u11C2' ;
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_BEGIN = ' \u11C3' ;
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_END = ' \u11FF' ;
// 한글 호환 자모
protected static final char HANGEUL_COMPAT_JAMO_BEGIN = ' \u3131' ;
protected static final char HANGEUL_COMPAT_JAMO_END = ' \u318E' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_BEGIN = ' \u3131' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_END = ' \u314E' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_BEGIN = ' \u314F' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_END = ' \u3163' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN = ' \u3165' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_END = ' \u3186' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN = ' \u3187' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_END = ' \u318E' ;
//
static final char HANGUL_CHOSEONG_KIYEOK = ' \u1100 ';
static final char HANGUL_CHOSEONG_SSANGKIYEOK = ' \u1101 ';
static final char HANGUL_CHOSEONG_NIEUN = ' \u1102 ';
static final char HANGUL_CHOSEONG_TIKEUT = ' \u1103 ';
static final char HANGUL_CHOSEONG_SSANGTIKEUT = ' \u1104 ';
static final char HANGUL_CHOSEONG_RIEUL = ' \u1105 ';
static final char HANGUL_CHOSEONG_MIEUM = ' \u1106 ';
static final char HANGUL_CHOSEONG_PIEUP = ' \u1107 ';
static final char HANGUL_CHOSEONG_SSANGPIEUP = ' \u1108 ';
static final char HANGUL_CHOSEONG_SIOS = ' \u1109 ';
static final char HANGUL_CHOSEONG_SSANGSIOS = ' \u110A ';
static final char HANGUL_CHOSEONG_IEUNG = ' \u110B ';
static final char HANGUL_CHOSEONG_CIEUC = ' \u110C ';
static final char HANGUL_CHOSEONG_SSANGCIEUC = ' \u110D ';
static final char HANGUL_CHOSEONG_CHIEUCH = ' \u110E ';
static final char HANGUL_CHOSEONG_KHIEUKH = ' \u110F ';
static final char HANGUL_CHOSEONG_THIEUTH = ' \u1110 ';
static final char HANGUL_CHOSEONG_PHIEUPH = ' \u1111 ';
static final char HANGUL_CHOSEONG_HIEUH = ' \u1112 ';
static final char HANGUL_JUNGSEONG_A = ' \u1161' ;
static final char HANGUL_JUNGSEONG_AE = ' \u1162 ';
static final char HANGUL_JUNGSEONG_YA = ' \u1163 ';
static final char HANGUL_JUNGSEONG_YAE = ' \u1164 ';
static final char HANGUL_JUNGSEONG_EO = ' \u1165 ';
static final char HANGUL_JUNGSEONG_E = ' \u1166' ;
static final char HANGUL_JUNGSEONG_YEO = ' \u1167 ';
static final char HANGUL_JUNGSEONG_YE = ' \u1168 ';
static final char HANGUL_JUNGSEONG_O = ' \u1169' ;
static final char HANGUL_JUNGSEONG_WA = ' \u116A ';
static final char HANGUL_JUNGSEONG_WAE = ' \u116B ';
static final char HANGUL_JUNGSEONG_OE = ' \u116C ';
static final char HANGUL_JUNGSEONG_YO = ' \u116D ';
static final char HANGUL_JUNGSEONG_U = ' \u116E' ;
static final char HANGUL_JUNGSEONG_WEO = ' \u116F ';
static final char HANGUL_JUNGSEONG_WE = ' \u1170 ';
static final char HANGUL_JUNGSEONG_WI = ' \u1171 ';
static final char HANGUL_JUNGSEONG_YU = ' \u1172 ';
static final char HANGUL_JUNGSEONG_EU = ' \u1173 ';
static final char HANGUL_JUNGSEONG_YI = ' \u1174 ';
static final char HANGUL_JUNGSEONG_I = ' \u1175' ;
static final char HANGUL_JONGSEONG_KIYEOK = ' \u11A8 ';
static final char HANGUL_JONGSEONG_SSANGKIYEOK = ' \u11A9 ';
static final char HANGUL_JONGSEONG_KIYEOK_SIOS = ' \u11AA ';
static final char HANGUL_JONGSEONG_NIEUN = ' \u11AB ';
static final char HANGUL_JONGSEONG_NIEUN_CIEUC = ' \u11AC ';
static final char HANGUL_JONGSEONG_NIEUN_HIEUH = ' \u11AD ';
static final char HANGUL_JONGSEONG_TIKEUT = ' \u11AE ';
static final char HANGUL_JONGSEONG_RIEUL = ' \u11AF ';
static final char HANGUL_JONGSEONG_RIEUL_KIYEOK = ' \u11B0' ;
static final char HANGUL_JONGSEONG_RIEUL_MIEUM = ' \u11B1 ';
static final char HANGUL_JONGSEONG_RIEUL_PIEUP = ' \u11B2 ';
static final char HANGUL_JONGSEONG_RIEUL_SIOS = ' \u11B3 ';
static final char HANGUL_JONGSEONG_RIEUL_THIEUTH = ' \u11B4' ;
static final char HANGUL_JONGSEONG_RIEUL_PHIEUPH = ' \u11B5' ;
static final char HANGUL_JONGSEONG_RIEUL_HIEUH = ' \u11B6 ';
static final char HANGUL_JONGSEONG_MIEUM = ' \u11B7 ';
static final char HANGUL_JONGSEONG_PIEUP = ' \u11B8 ';
static final char HANGUL_JONGSEONG_PIEUP_SIOS = ' \u11B9 ';
static final char HANGUL_JONGSEONG_SIOS = ' \u11BA ';
static final char HANGUL_JONGSEONG_SSANGSIOS = ' \u11BB ';
static final char HANGUL_JONGSEONG_IEUNG = ' \u11BC ';
static final char HANGUL_JONGSEONG_CIEUC = ' \u11BD ';
static final char HANGUL_JONGSEONG_CHIEUCH = ' \u11BE ';
static final char HANGUL_JONGSEONG_KHIEUKH = ' \u11BF ';
static final char HANGUL_JONGSEONG_THIEUTH = ' \u11C0 ';
static final char HANGUL_JONGSEONG_PHIEUPH = ' \u11C1 ';
static final char HANGUL_JONGSEONG_HIEUH = ' \u11C2 ';
public static final char FILLER = 0 ;
private char character ;
private char initialConsonant , medialVowel , finalConsonant ;
public HCharacter( char character) throws OutOfUnicodeRangeException {
if (isSyllableLetter (character)) {
this. character = character;
this. finalConsonant = getJongSeong( this. character );
this. medialVowel = getJungSeong( this. character );
this. initialConsonant = getChoSeong( this. character );
} else if ( isJamoLetter(character)) {
this. character = character;
this. finalConsonant = FILLER;
this. medialVowel = FILLER;
this. initialConsonant = FILLER;
} else if ( isJamoCompatLetter(character)) {
this. character = character;
this. finalConsonant = FILLER;
this. medialVowel = FILLER;
this. initialConsonant = FILLER;
} else {
throw new OutOfUnicodeRangeException(character);
}
}
public HCharacter( char initialConsonant, char medialVowel, char finalConsonant) throwsOutOfUnicodeRangeException {
// 한글 호환 자모의 코드를 그냥 자모로 변환한다 .
if (isJamoCompatLetter (initialConsonant)) {
for ( char [] row : CONV_INITIAL_CONSONANTS ) {
if (row[ 1 ] == initialConsonant) {
initialConsonant = row[ 0 ];
break ;
}
}
}
if (isJamoCompatLetter (medialVowel)) {
medialVowel -= HANGEUL_COMPAT_JAMO_VOWEL_BEGIN ;
medialVowel += HANGEUL_JAMO_JUNGSEONG_BEGIN ;
}
if (isJamoCompatLetter (finalConsonant)) {
for ( char [] row : CONV_FINAL_CONSONANTS ) {
if (row[ 1 ] == finalConsonant) {
finalConsonant = row[ 0 ];
break ;
}
}
}
// 옛한글은 완성형 문자 없음
if (!isJamoLetter(initialConsonant) || isOldLetter(initialConsonant))
throw new OutOfUnicodeRangeException(initialConsonant);
if (! isJamoLetter(medialVowel) || isOldLetter(medialVowel))
throw new OutOfUnicodeRangeException(medialVowel);
if (finalConsonant!= FILLER && (!isJamoLetter(finalConsonant) || isOldLetter(finalConsonant)))
throw new OutOfUnicodeRangeException(finalConsonant);
this. initialConsonant = initialConsonant;
this. medialVowel = medialVowel;
this. finalConsonant = finalConsonant;
// 자모 결합
initialConsonant -= HANGEUL_JAMO_CHOSEONG_BEGIN ;
medialVowel -= HANGEUL_JAMO_JUNGSEONG_BEGIN ;
if (finalConsonant!= FILLER) {
finalConsonant -= HANGEUL_JAMO_JONGSEONG_BEGIN ;
}
int val = 0;
val += initialConsonant * 28 * 21 ;
val += medialVowel * 28;
if (finalConsonant!= FILLER) val += finalConsonant + 1 ;
val += HANGEUL_SYLLABLE_BEGIN;
this. character = ( char )val;
}
public HCharacter( char initialConsonant, char medialVowel) throws OutOfUnicodeRangeException {
this(initialConsonant, medialVowel, FILLER );
}
public char getChar(){
return character ;
}
public char getChoSeong() {
return initialConsonant ;
}
public char getJungSeong() {
return medialVowel ;
}
public char getJongSeong() {
return finalConsonant ;
}
public char getChoSeongCompat() {
return toCharCompat( initialConsonant );
}
public char getJungSeongCompat() {
return toCharCompat( medialVowel );
}
public char getJongSeongCompat() {
return toCharCompat( finalConsonant );
}
protected static char toCharCompat( char ch){
if (! isJamoLetter(ch)) return ch;
if (isVowelLetter (ch)){
return ( char ) (ch - HANGEUL_JAMO_JUNGSEONG_BEGIN + HANGEUL_COMPAT_JAMO_VOWEL_BEGIN );
} else {
for ( char [] row : CONV_INITIAL_CONSONANTS ) {
if (row[ 0 ] == ch) {
return row[1 ];
}
}
for ( char [] row : CONV_FINAL_CONSONANTS ) {
if (row[ 0 ] == ch) {
return row[1 ];
}
}
}
return ' \u3164 ';
}
protected static char getChoSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val / 28 / 21 ;
val += HANGEUL_JAMO_CHOSEONG_BEGIN;
return ( char) val;
}
protected static char getJungSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val / 28 % 21 ;
val += HANGEUL_JAMO_JUNGSEONG_BEGIN;
return ( char) val;
}
protected static char getJongSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val % 28 - 1 ;
if (val == - 1) return FILLER; //종성없음
val += HANGEUL_JAMO_JONGSEONG_BEGIN;
return ( char) val;
}
public String toString(){
return String. valueOf( character);
}
public boolean isJamoLetter(){
return isJamoLetter( character );
}
protected static boolean isJamoLetter( char ch){
return (ch>= HANGEUL_JAMO_BEGIN && ch<=HANGEUL_JAMO_END );
}
public boolean isJamoCompatLetter(){
return isJamoCompatLetter( character );
}
protected static boolean isJamoCompatLetter( char ch){
return (ch>= HANGEUL_COMPAT_JAMO_BEGIN && ch<=HANGEUL_COMPAT_JAMO_END );
}
public boolean isSyllableLetter(){
return isSyllableLetter( character );
}
protected static boolean isSyllableLetter( char ch){
return (ch>= HANGEUL_SYLLABLE_BEGIN && ch<=HANGEUL_SYLLABLE_END );
}
protected static boolean isVowelLetter( char ch) {
return (ch>= HANGEUL_JAMO_JUNGSEONG_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_VOWEL_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_END ) ||
(ch>= HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
}
protected static boolean isConsonantLetter( char ch) {
if (isJamoLetter (ch) || isJamoCompatLetter(ch)) {
return ! isVowelLetter(ch);
}
return false;
}
protected static boolean isOldLetter( char ch){
return (ch>= HANGEUL_JAMO_CHOSEONG_OLD_BEGIN && ch<= HANGEUL_JAMO_CHOSEONG_OLD_END) ||
(ch>= HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
(ch>= HANGEUL_JAMO_JONGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JONGSEONG_OLD_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
}
private static final char [][] CONV_INITIAL_CONSONANTS = new char[][] {
{ HANGUL_CHOSEONG_KIYEOK, ' \u3131 '}, //ㄱ
{ HANGUL_CHOSEONG_SSANGKIYEOK, ' \u3132 '}, //ㄲ
{ HANGUL_CHOSEONG_NIEUN, ' \u3134 '}, //ㄴ
{ HANGUL_CHOSEONG_TIKEUT, ' \u3137 '}, //ㄷ
{ HANGUL_CHOSEONG_SSANGTIKEUT, ' \u3138 '}, //ㄸ
{ HANGUL_CHOSEONG_RIEUL, ' \u3139 '}, //ㄹ
{ HANGUL_CHOSEONG_MIEUM, ' \u3141 '}, //ㅁ
{ HANGUL_CHOSEONG_PIEUP, ' \u3142 '}, //ㅂ
{ HANGUL_CHOSEONG_SSANGPIEUP, ' \u3143 '}, //ㅃ
{ HANGUL_CHOSEONG_SIOS, ' \u3145 '}, //ㅅ
{ HANGUL_CHOSEONG_SSANGSIOS, ' \u3146 '}, //ㅆ
{ HANGUL_CHOSEONG_IEUNG, ' \u3147 '}, //ㅇ
{ HANGUL_CHOSEONG_CIEUC, ' \u3148 '}, //ㅈ
{ HANGUL_CHOSEONG_SSANGCIEUC, ' \u3149 '}, //ㅉ
{ HANGUL_CHOSEONG_CHIEUCH, ' \u314A '}, //ㅊ
{ HANGUL_CHOSEONG_KHIEUKH, ' \u314B '}, //ㅋ
{ HANGUL_CHOSEONG_THIEUTH, ' \u314C '}, //ㅌ
{ HANGUL_CHOSEONG_PHIEUPH, ' \u314D '}, //ㅍ
{ HANGUL_CHOSEONG_HIEUH, ' \u314E '} //ㅎ
};
private static final char [][] CONV_FINAL_CONSONANTS = new char[][] {
{ HANGUL_JONGSEONG_KIYEOK, ' \u3131 '}, //ㄱ
{ HANGUL_JONGSEONG_SSANGKIYEOK, ' \u3132 '}, //ㄲ
{ HANGUL_JONGSEONG_KIYEOK_SIOS, ' \u3133 '}, //ㄳ
{ HANGUL_JONGSEONG_NIEUN, ' \u3134 '}, //ㄴ
{ HANGUL_JONGSEONG_NIEUN_CIEUC, ' \u3135 '}, //ㄵ
{ HANGUL_JONGSEONG_NIEUN_HIEUH, ' \u3136 '}, //ㄶ
{ HANGUL_JONGSEONG_TIKEUT, ' \u3137 '}, //ㄷ
{ HANGUL_JONGSEONG_RIEUL, ' \u3139 '}, //ㄹ
{ HANGUL_JONGSEONG_RIEUL_KIYEOK, ' \u313A '}, //ㄺ
{ HANGUL_JONGSEONG_RIEUL_MIEUM, ' \u313B '}, //ㄻ
{ HANGUL_JONGSEONG_RIEUL_PIEUP, ' \u313C '}, //ㄼ
{ HANGUL_JONGSEONG_RIEUL_SIOS, ' \u313D '}, //ㄽ
{ HANGUL_JONGSEONG_RIEUL_THIEUTH , ' \u313E '}, //ㄾ
{ HANGUL_JONGSEONG_RIEUL_PHIEUPH, ' \u313F '}, //ㄿ
{ HANGUL_JONGSEONG_RIEUL_HIEUH, ' \u3140 '}, //ㅀ
{ HANGUL_JONGSEONG_MIEUM, ' \u3141 '}, //ㅁ
{ HANGUL_JONGSEONG_PIEUP, ' \u3142 '}, //ㅂ
{ HANGUL_JONGSEONG_PIEUP_SIOS, ' \u3144 '}, //ㅄ
{ HANGUL_JONGSEONG_SIOS, ' \u3145 '}, //ㅅ
{ HANGUL_JONGSEONG_SSANGSIOS, ' \u3146 '}, //ㅆ
{ HANGUL_JONGSEONG_IEUNG, ' \u3147 '}, //ㅇ
{ HANGUL_JONGSEONG_CIEUC, ' \u3148 '}, //ㅈ
{ HANGUL_JONGSEONG_CHIEUCH, ' \u314A '}, //ㅊ
{ HANGUL_JONGSEONG_KHIEUKH, ' \u3148 '}, //ㅋ
{ HANGUL_JONGSEONG_THIEUTH, ' \u314C '}, //ㅌ
{ HANGUL_JONGSEONG_PHIEUPH, ' \u314D '}, //ㅍ
{ HANGUL_JONGSEONG_HIEUH, ' \u314E '} //ㅎ
};
public class OutOfUnicodeRangeException extends Exception {
private char character ;
public OutOfUnicodeRangeException( char c) {
this. character = c;
}
public String toString(){
return Integer. toHexString(( int) character);
}
}
public static void main(String[] args) throws OutOfUnicodeRangeException {
//Usage 1.
HCharacter character = new HCharacter( '호 ' );
System. out.print(character.getChoSeongCompat());
System. out.print(character.getJungSeongCompat());
System. out.print(character.getJongSeongCompat());
System. out.println();
//Usage 2.
System. out .println( new HCharacter( 'ㅇ ' , ' ㅏ '));
System. out.println( new HCharacter( ' ㅌ ', 'ㅚ ' , ' ㄺ '));
}
}
/**
* Created by Elex on 2014-07-11.
*/public class HCharacter {
// 한글 음절
protected static final char HANGEUL_SYLLABLE_BEGIN = ' \uAC00' ;
protected static final char HANGEUL_SYLLABLE_END = ' \uD7AF' ;
// 한글 자모
protected static final char HANGEUL_JAMO_BEGIN = ' \u1100 ';
protected static final char HANGEUL_JAMO_END = ' \u11FF ';
protected static final char HANGEUL_JAMO_CHOSEONG_BEGIN = ' \u1100' ;
protected static final char HANGEUL_JAMO_CHOSEONG_END = ' \u1112' ;
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_BEGIN = ' \u1113' ;
protected static final char HANGEUL_JAMO_CHOSEONG_OLD_END = ' \u115F' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_BEGIN = ' \u1161' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_END = ' \u1175' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN = ' \u1176' ;
protected static final char HANGEUL_JAMO_JUNGSEONG_OLD_END = ' \u11A7' ;
protected static final char HANGEUL_JAMO_JONGSEONG_BEGIN = ' \u11A8' ;
protected static final char HANGEUL_JAMO_JONGSEONG_END = ' \u11C2' ;
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_BEGIN = ' \u11C3' ;
protected static final char HANGEUL_JAMO_JONGSEONG_OLD_END = ' \u11FF' ;
// 한글 호환 자모
protected static final char HANGEUL_COMPAT_JAMO_BEGIN = ' \u3131' ;
protected static final char HANGEUL_COMPAT_JAMO_END = ' \u318E' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_BEGIN = ' \u3131' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_END = ' \u314E' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_BEGIN = ' \u314F' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_END = ' \u3163' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN = ' \u3165' ;
protected static final char HANGEUL_COMPAT_JAMO_CONSONANT_OLD_END = ' \u3186' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN = ' \u3187' ;
protected static final char HANGEUL_COMPAT_JAMO_VOWEL_OLD_END = ' \u318E' ;
//
static final char HANGUL_CHOSEONG_KIYEOK = ' \u1100 ';
static final char HANGUL_CHOSEONG_SSANGKIYEOK = ' \u1101 ';
static final char HANGUL_CHOSEONG_NIEUN = ' \u1102 ';
static final char HANGUL_CHOSEONG_TIKEUT = ' \u1103 ';
static final char HANGUL_CHOSEONG_SSANGTIKEUT = ' \u1104 ';
static final char HANGUL_CHOSEONG_RIEUL = ' \u1105 ';
static final char HANGUL_CHOSEONG_MIEUM = ' \u1106 ';
static final char HANGUL_CHOSEONG_PIEUP = ' \u1107 ';
static final char HANGUL_CHOSEONG_SSANGPIEUP = ' \u1108 ';
static final char HANGUL_CHOSEONG_SIOS = ' \u1109 ';
static final char HANGUL_CHOSEONG_SSANGSIOS = ' \u110A ';
static final char HANGUL_CHOSEONG_IEUNG = ' \u110B ';
static final char HANGUL_CHOSEONG_CIEUC = ' \u110C ';
static final char HANGUL_CHOSEONG_SSANGCIEUC = ' \u110D ';
static final char HANGUL_CHOSEONG_CHIEUCH = ' \u110E ';
static final char HANGUL_CHOSEONG_KHIEUKH = ' \u110F ';
static final char HANGUL_CHOSEONG_THIEUTH = ' \u1110 ';
static final char HANGUL_CHOSEONG_PHIEUPH = ' \u1111 ';
static final char HANGUL_CHOSEONG_HIEUH = ' \u1112 ';
static final char HANGUL_JUNGSEONG_A = ' \u1161' ;
static final char HANGUL_JUNGSEONG_AE = ' \u1162 ';
static final char HANGUL_JUNGSEONG_YA = ' \u1163 ';
static final char HANGUL_JUNGSEONG_YAE = ' \u1164 ';
static final char HANGUL_JUNGSEONG_EO = ' \u1165 ';
static final char HANGUL_JUNGSEONG_E = ' \u1166' ;
static final char HANGUL_JUNGSEONG_YEO = ' \u1167 ';
static final char HANGUL_JUNGSEONG_YE = ' \u1168 ';
static final char HANGUL_JUNGSEONG_O = ' \u1169' ;
static final char HANGUL_JUNGSEONG_WA = ' \u116A ';
static final char HANGUL_JUNGSEONG_WAE = ' \u116B ';
static final char HANGUL_JUNGSEONG_OE = ' \u116C ';
static final char HANGUL_JUNGSEONG_YO = ' \u116D ';
static final char HANGUL_JUNGSEONG_U = ' \u116E' ;
static final char HANGUL_JUNGSEONG_WEO = ' \u116F ';
static final char HANGUL_JUNGSEONG_WE = ' \u1170 ';
static final char HANGUL_JUNGSEONG_WI = ' \u1171 ';
static final char HANGUL_JUNGSEONG_YU = ' \u1172 ';
static final char HANGUL_JUNGSEONG_EU = ' \u1173 ';
static final char HANGUL_JUNGSEONG_YI = ' \u1174 ';
static final char HANGUL_JUNGSEONG_I = ' \u1175' ;
static final char HANGUL_JONGSEONG_KIYEOK = ' \u11A8 ';
static final char HANGUL_JONGSEONG_SSANGKIYEOK = ' \u11A9 ';
static final char HANGUL_JONGSEONG_KIYEOK_SIOS = ' \u11AA ';
static final char HANGUL_JONGSEONG_NIEUN = ' \u11AB ';
static final char HANGUL_JONGSEONG_NIEUN_CIEUC = ' \u11AC ';
static final char HANGUL_JONGSEONG_NIEUN_HIEUH = ' \u11AD ';
static final char HANGUL_JONGSEONG_TIKEUT = ' \u11AE ';
static final char HANGUL_JONGSEONG_RIEUL = ' \u11AF ';
static final char HANGUL_JONGSEONG_RIEUL_KIYEOK = ' \u11B0' ;
static final char HANGUL_JONGSEONG_RIEUL_MIEUM = ' \u11B1 ';
static final char HANGUL_JONGSEONG_RIEUL_PIEUP = ' \u11B2 ';
static final char HANGUL_JONGSEONG_RIEUL_SIOS = ' \u11B3 ';
static final char HANGUL_JONGSEONG_RIEUL_THIEUTH = ' \u11B4' ;
static final char HANGUL_JONGSEONG_RIEUL_PHIEUPH = ' \u11B5' ;
static final char HANGUL_JONGSEONG_RIEUL_HIEUH = ' \u11B6 ';
static final char HANGUL_JONGSEONG_MIEUM = ' \u11B7 ';
static final char HANGUL_JONGSEONG_PIEUP = ' \u11B8 ';
static final char HANGUL_JONGSEONG_PIEUP_SIOS = ' \u11B9 ';
static final char HANGUL_JONGSEONG_SIOS = ' \u11BA ';
static final char HANGUL_JONGSEONG_SSANGSIOS = ' \u11BB ';
static final char HANGUL_JONGSEONG_IEUNG = ' \u11BC ';
static final char HANGUL_JONGSEONG_CIEUC = ' \u11BD ';
static final char HANGUL_JONGSEONG_CHIEUCH = ' \u11BE ';
static final char HANGUL_JONGSEONG_KHIEUKH = ' \u11BF ';
static final char HANGUL_JONGSEONG_THIEUTH = ' \u11C0 ';
static final char HANGUL_JONGSEONG_PHIEUPH = ' \u11C1 ';
static final char HANGUL_JONGSEONG_HIEUH = ' \u11C2 ';
public static final char FILLER = 0 ;
private char character ;
private char initialConsonant , medialVowel , finalConsonant ;
public HCharacter( char character) throws OutOfUnicodeRangeException {
if (isSyllableLetter (character)) {
this. character = character;
this. finalConsonant = getJongSeong( this. character );
this. medialVowel = getJungSeong( this. character );
this. initialConsonant = getChoSeong( this. character );
} else if ( isJamoLetter(character)) {
this. character = character;
this. finalConsonant = FILLER;
this. medialVowel = FILLER;
this. initialConsonant = FILLER;
} else if ( isJamoCompatLetter(character)) {
this. character = character;
this. finalConsonant = FILLER;
this. medialVowel = FILLER;
this. initialConsonant = FILLER;
} else {
throw new OutOfUnicodeRangeException(character);
}
}
public HCharacter( char initialConsonant, char medialVowel, char finalConsonant) throwsOutOfUnicodeRangeException {
// 한글 호환 자모의 코드를 그냥 자모로 변환한다 .
if (isJamoCompatLetter (initialConsonant)) {
for ( char [] row : CONV_INITIAL_CONSONANTS ) {
if (row[ 1 ] == initialConsonant) {
initialConsonant = row[ 0 ];
break ;
}
}
}
if (isJamoCompatLetter (medialVowel)) {
medialVowel -= HANGEUL_COMPAT_JAMO_VOWEL_BEGIN ;
medialVowel += HANGEUL_JAMO_JUNGSEONG_BEGIN ;
}
if (isJamoCompatLetter (finalConsonant)) {
for ( char [] row : CONV_FINAL_CONSONANTS ) {
if (row[ 1 ] == finalConsonant) {
finalConsonant = row[ 0 ];
break ;
}
}
}
// 옛한글은 완성형 문자 없음
if (!isJamoLetter(initialConsonant) || isOldLetter(initialConsonant))
throw new OutOfUnicodeRangeException(initialConsonant);
if (! isJamoLetter(medialVowel) || isOldLetter(medialVowel))
throw new OutOfUnicodeRangeException(medialVowel);
if (finalConsonant!= FILLER && (!isJamoLetter(finalConsonant) || isOldLetter(finalConsonant)))
throw new OutOfUnicodeRangeException(finalConsonant);
this. initialConsonant = initialConsonant;
this. medialVowel = medialVowel;
this. finalConsonant = finalConsonant;
// 자모 결합
initialConsonant -= HANGEUL_JAMO_CHOSEONG_BEGIN ;
medialVowel -= HANGEUL_JAMO_JUNGSEONG_BEGIN ;
if (finalConsonant!= FILLER) {
finalConsonant -= HANGEUL_JAMO_JONGSEONG_BEGIN ;
}
int val = 0;
val += initialConsonant * 28 * 21 ;
val += medialVowel * 28;
if (finalConsonant!= FILLER) val += finalConsonant + 1 ;
val += HANGEUL_SYLLABLE_BEGIN;
this. character = ( char )val;
}
public HCharacter( char initialConsonant, char medialVowel) throws OutOfUnicodeRangeException {
this(initialConsonant, medialVowel, FILLER );
}
public char getChar(){
return character ;
}
public char getChoSeong() {
return initialConsonant ;
}
public char getJungSeong() {
return medialVowel ;
}
public char getJongSeong() {
return finalConsonant ;
}
public char getChoSeongCompat() {
return toCharCompat( initialConsonant );
}
public char getJungSeongCompat() {
return toCharCompat( medialVowel );
}
public char getJongSeongCompat() {
return toCharCompat( finalConsonant );
}
protected static char toCharCompat( char ch){
if (! isJamoLetter(ch)) return ch;
if (isVowelLetter (ch)){
return ( char ) (ch - HANGEUL_JAMO_JUNGSEONG_BEGIN + HANGEUL_COMPAT_JAMO_VOWEL_BEGIN );
} else {
for ( char [] row : CONV_INITIAL_CONSONANTS ) {
if (row[ 0 ] == ch) {
return row[1 ];
}
}
for ( char [] row : CONV_FINAL_CONSONANTS ) {
if (row[ 0 ] == ch) {
return row[1 ];
}
}
}
return ' \u3164 ';
}
protected static char getChoSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val / 28 / 21 ;
val += HANGEUL_JAMO_CHOSEONG_BEGIN;
return ( char) val;
}
protected static char getJungSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val / 28 % 21 ;
val += HANGEUL_JAMO_JUNGSEONG_BEGIN;
return ( char) val;
}
protected static char getJongSeong( char character) {
int val = ( int)character;
val -= HANGEUL_SYLLABLE_BEGIN;
val = val % 28 - 1 ;
if (val == - 1) return FILLER; //종성없음
val += HANGEUL_JAMO_JONGSEONG_BEGIN;
return ( char) val;
}
public String toString(){
return String. valueOf( character);
}
public boolean isJamoLetter(){
return isJamoLetter( character );
}
protected static boolean isJamoLetter( char ch){
return (ch>= HANGEUL_JAMO_BEGIN && ch<=HANGEUL_JAMO_END );
}
public boolean isJamoCompatLetter(){
return isJamoCompatLetter( character );
}
protected static boolean isJamoCompatLetter( char ch){
return (ch>= HANGEUL_COMPAT_JAMO_BEGIN && ch<=HANGEUL_COMPAT_JAMO_END );
}
public boolean isSyllableLetter(){
return isSyllableLetter( character );
}
protected static boolean isSyllableLetter( char ch){
return (ch>= HANGEUL_SYLLABLE_BEGIN && ch<=HANGEUL_SYLLABLE_END );
}
protected static boolean isVowelLetter( char ch) {
return (ch>= HANGEUL_JAMO_JUNGSEONG_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_VOWEL_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_END ) ||
(ch>= HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_VOWEL_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
}
protected static boolean isConsonantLetter( char ch) {
if (isJamoLetter (ch) || isJamoCompatLetter(ch)) {
return ! isVowelLetter(ch);
}
return false;
}
protected static boolean isOldLetter( char ch){
return (ch>= HANGEUL_JAMO_CHOSEONG_OLD_BEGIN && ch<= HANGEUL_JAMO_CHOSEONG_OLD_END) ||
(ch>= HANGEUL_JAMO_JUNGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JUNGSEONG_OLD_END ) ||
(ch>= HANGEUL_JAMO_JONGSEONG_OLD_BEGIN && ch<=HANGEUL_JAMO_JONGSEONG_OLD_END ) ||
(ch>= HANGEUL_COMPAT_JAMO_CONSONANT_OLD_BEGIN && ch<=HANGEUL_COMPAT_JAMO_VOWEL_OLD_END );
}
private static final char [][] CONV_INITIAL_CONSONANTS = new char[][] {
{ HANGUL_CHOSEONG_KIYEOK, ' \u3131 '}, //ㄱ
{ HANGUL_CHOSEONG_SSANGKIYEOK, ' \u3132 '}, //ㄲ
{ HANGUL_CHOSEONG_NIEUN, ' \u3134 '}, //ㄴ
{ HANGUL_CHOSEONG_TIKEUT, ' \u3137 '}, //ㄷ
{ HANGUL_CHOSEONG_SSANGTIKEUT, ' \u3138 '}, //ㄸ
{ HANGUL_CHOSEONG_RIEUL, ' \u3139 '}, //ㄹ
{ HANGUL_CHOSEONG_MIEUM, ' \u3141 '}, //ㅁ
{ HANGUL_CHOSEONG_PIEUP, ' \u3142 '}, //ㅂ
{ HANGUL_CHOSEONG_SSANGPIEUP, ' \u3143 '}, //ㅃ
{ HANGUL_CHOSEONG_SIOS, ' \u3145 '}, //ㅅ
{ HANGUL_CHOSEONG_SSANGSIOS, ' \u3146 '}, //ㅆ
{ HANGUL_CHOSEONG_IEUNG, ' \u3147 '}, //ㅇ
{ HANGUL_CHOSEONG_CIEUC, ' \u3148 '}, //ㅈ
{ HANGUL_CHOSEONG_SSANGCIEUC, ' \u3149 '}, //ㅉ
{ HANGUL_CHOSEONG_CHIEUCH, ' \u314A '}, //ㅊ
{ HANGUL_CHOSEONG_KHIEUKH, ' \u314B '}, //ㅋ
{ HANGUL_CHOSEONG_THIEUTH, ' \u314C '}, //ㅌ
{ HANGUL_CHOSEONG_PHIEUPH, ' \u314D '}, //ㅍ
{ HANGUL_CHOSEONG_HIEUH, ' \u314E '} //ㅎ
};
private static final char [][] CONV_FINAL_CONSONANTS = new char[][] {
{ HANGUL_JONGSEONG_KIYEOK, ' \u3131 '}, //ㄱ
{ HANGUL_JONGSEONG_SSANGKIYEOK, ' \u3132 '}, //ㄲ
{ HANGUL_JONGSEONG_KIYEOK_SIOS, ' \u3133 '}, //ㄳ
{ HANGUL_JONGSEONG_NIEUN, ' \u3134 '}, //ㄴ
{ HANGUL_JONGSEONG_NIEUN_CIEUC, ' \u3135 '}, //ㄵ
{ HANGUL_JONGSEONG_NIEUN_HIEUH, ' \u3136 '}, //ㄶ
{ HANGUL_JONGSEONG_TIKEUT, ' \u3137 '}, //ㄷ
{ HANGUL_JONGSEONG_RIEUL, ' \u3139 '}, //ㄹ
{ HANGUL_JONGSEONG_RIEUL_KIYEOK, ' \u313A '}, //ㄺ
{ HANGUL_JONGSEONG_RIEUL_MIEUM, ' \u313B '}, //ㄻ
{ HANGUL_JONGSEONG_RIEUL_PIEUP, ' \u313C '}, //ㄼ
{ HANGUL_JONGSEONG_RIEUL_SIOS, ' \u313D '}, //ㄽ
{ HANGUL_JONGSEONG_RIEUL_THIEUTH , ' \u313E '}, //ㄾ
{ HANGUL_JONGSEONG_RIEUL_PHIEUPH, ' \u313F '}, //ㄿ
{ HANGUL_JONGSEONG_RIEUL_HIEUH, ' \u3140 '}, //ㅀ
{ HANGUL_JONGSEONG_MIEUM, ' \u3141 '}, //ㅁ
{ HANGUL_JONGSEONG_PIEUP, ' \u3142 '}, //ㅂ
{ HANGUL_JONGSEONG_PIEUP_SIOS, ' \u3144 '}, //ㅄ
{ HANGUL_JONGSEONG_SIOS, ' \u3145 '}, //ㅅ
{ HANGUL_JONGSEONG_SSANGSIOS, ' \u3146 '}, //ㅆ
{ HANGUL_JONGSEONG_IEUNG, ' \u3147 '}, //ㅇ
{ HANGUL_JONGSEONG_CIEUC, ' \u3148 '}, //ㅈ
{ HANGUL_JONGSEONG_CHIEUCH, ' \u314A '}, //ㅊ
{ HANGUL_JONGSEONG_KHIEUKH, ' \u3148 '}, //ㅋ
{ HANGUL_JONGSEONG_THIEUTH, ' \u314C '}, //ㅌ
{ HANGUL_JONGSEONG_PHIEUPH, ' \u314D '}, //ㅍ
{ HANGUL_JONGSEONG_HIEUH, ' \u314E '} //ㅎ
};
public class OutOfUnicodeRangeException extends Exception {
private char character ;
public OutOfUnicodeRangeException( char c) {
this. character = c;
}
public String toString(){
return Integer. toHexString(( int) character);
}
}
public static void main(String[] args) throws OutOfUnicodeRangeException {
//Usage 1.
HCharacter character = new HCharacter( '호 ' );
System. out.print(character.getChoSeongCompat());
System. out.print(character.getJungSeongCompat());
System. out.print(character.getJongSeongCompat());
System. out.println();
//Usage 2.
System. out .println( new HCharacter( 'ㅇ ' , ' ㅏ '));
System. out.println( new HCharacter( ' ㅌ ', 'ㅚ ' , ' ㄺ '));
}
}
댓글
댓글 쓰기