安卓汉字转拼音

Home / Android MrLee 2014-8-5 4366

原来研究的小项目,现在简单整理了一下,免费贡献给大家!

20140805125728


package com.tool.hz2py;
import android.os.Bundle;
import android.app.Activity;
import android.view.Menu;
import android.widget.TextView;
public class MainActivity extends Activity {
	protected Hz2py hz2py;
	@Override
	protected void onCreate(Bundle savedInstanceState) {
		super.onCreate(savedInstanceState);
		setContentView(R.layout.activity_main);
		hz2py = new Hz2py();
		TextView view = (TextView) findViewById(R.id.text);
		view.setText(hz2py.hz2py("汉字转拼音"));
	}
	@Override
	public boolean onCreateOptionsMenu(Menu menu) {
// Inflate the menu; this adds items to the action bar if it is present.
		getMenuInflater().inflate(R.menu.main, menu);
		return true;
	}
}

jni类:Hz2py
package com.tool.hz2py;
public class Hz2py {
	static {
		System.loadLibrary("Hz2py");
	};
	public native String hz2py(String text);
}

下面是C++头文件和代码
/* DO NOT EDIT THIS FILE - it is machine generated */
#include
/* Header for class com_tool_hz2py_Hz2py */
#ifndef _Included_com_tool_hz2py_Hz2py
#define _Included_com_tool_hz2py_Hz2py
#ifdef __cplusplus
extern "C" {
#endif
/*
* Class: com_tool_hz2py_Hz2py
* Method: hz2py
* Signature: (Ljava/lang/String;)Ljava/lang/String;
*/
JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py
(JNIEnv *, jobject, jstring);
#ifdef __cplusplus
}
#endif
#endif

#include "hz2py.h"
#include <string.h>
#include "com_tool_hz2py_Hz2py.h"
#define HZ2PY_UTF8_CHECK_LENGTH 20
#define HZ2PY_FILE_READ_BUF_ARRAY_SIZE 1
#define HZ2PY_INPUT_BUF_ARRAY_SIZE 1024
#define HZ2PY_OUTPUT_BUF_ARRAY_SIZE 2048
#define HZ2PY_STR_COPY(to, from, count) \
    ok = 1;\
    i = 0;\
    _tmp = from;\
    while(i < count)\
    {\
        if (*_tmp == '\0')\
        {\
            ok = 0;\
            break;\
        }\
        _tmp ++;\
        i ++;\
    }\
    if (ok)\
{\
    i = 0;\
    while(i < count)\
    {\
        *to = *from;\
        to ++;\
        from ++;\
        i ++;\
    }\
}\
else\
{\
    if (overage_buff != NULL)\
    {\
        while(*from != '\0')\
        {\
            *overage_buff = *from;\
            from ++;\
        }\
    }\
    break;\
}
//将utf8编码的字符串中的汉字解成拼音
// in 输入
// out 输出
// first_letter_only 是否只输出拼音首字母
// polyphone_support 是否输出多音字
// add_blank 是否在拼音之间追加空格
// convert_double_char 是否转换全角字符为半角字符
// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out
void utf8_to_pinyin(char *in, char *out, int first_letter_only,
                    int polyphone_support, int add_blank, int convert_double_char,
                    char *overage_buff) {
	int i = 0;
	char *utf = in;
	char *_tmp;
	char *_tmp2;
	char py_tmp[30] = "";
	char py_tmp2[30] = "";
	char *out_start_flag = out;
	int uni;
	int ok = 0;
	while (*utf != '\0') {
		if ((*utf >> 7) == 0) {
			HZ2PY_STR_COPY(out, utf, 1);
			//如果为一个字节加上#号分隔
			*out = '#'; //用#号做为分隔符
			out++;
			//去掉其它的英文只留汉字
			//只能搜索到汉字拼音里面字母
			//			out--;
			//			*out = ' ';
		}
		//两个字节
		else if ((*utf & 0xE0) == 0xC0) {
			HZ2PY_STR_COPY(out, utf, 2);
		}
		//三个字节
		else if ((*utf & 0xF0) == 0xE0) {
			if (*(utf + 1) != '\0' && *(utf + 2) != '\0') {
				uni = (((int) (*utf & 0x0F)) << 12)
				      (((int) (*(utf + 1) & 0x3F)) << 6)
				      (*(utf + 2) & 0x3F);
				if (uni > 19967 && uni < 40870) {
					memset(py_tmp, '\0', 30);
					memset(py_tmp2, '\0', 30);
					strcpy(py_tmp, _pinyin_table_[uni - 19968]);
					_tmp = py_tmp;
					_tmp2 = py_tmp2;
					if (first_letter_only == 1) {
						*_tmp2 = *_tmp;
						_tmp++;
						_tmp2++;
						while (*_tmp != '\0') {
							if (*_tmp == ''  *(_tmp - 1) == '') {
								*_tmp2 = *_tmp;
								_tmp2++;
							}
							_tmp++;
						}
					} else {
						strcpy(py_tmp2, py_tmp);
					}
					_tmp2 = py_tmp2;
					if (polyphone_support == 0) {
						while (*_tmp2 != '\0') {
							if (*_tmp2 == '') {
								*_tmp2 = '\0';
								break;
							}
							_tmp2++;
						}
						_tmp2 = py_tmp2;
					}
					strcpy(out, _tmp2);
					out += strlen(_tmp2);
					if (add_blank) {
						*out = '#'; //用#号做为分隔符
						out++;
					}
					utf += 3;
				} else if (convert_double_char && uni > 65280 && uni < 65375) {
					*out = uni - 65248;
					out++;
					utf += 3;
				} else if (convert_double_char && uni == 12288) {
					*out = 32;
					out++;
					utf += 3;
				} else {
					HZ2PY_STR_COPY(out, utf, 3);
				}
			} else {
				HZ2PY_STR_COPY(out, utf, 3);
			}
		}
		//四个字节
		else if ((*utf & 0xF8) == 0xF0) {
			HZ2PY_STR_COPY(out, utf, 4);
		}
		//五个字节
		else if ((*utf & 0xFC) == 0xF8) {
			HZ2PY_STR_COPY(out, utf, 5);
		}
		//六个字节
		else if ((*utf & 0xFE) == 0xFC) {
			HZ2PY_STR_COPY(out, utf, 6);
		} else {
			if (overage_buff != NULL) {
				*overage_buff = *utf;
				overage_buff++;
			} else {
				HZ2PY_STR_COPY(out, utf, 1);
			}
			break;
		}
	}
}
//判断一个字符串是否为utf8编码
int is_utf8_string(char *utf) {
	int length = strlen(utf);
	int check_sub = 0;
	int i = 0;
	if (length > HZ2PY_UTF8_CHECK_LENGTH) {
		length = HZ2PY_UTF8_CHECK_LENGTH;
	}
	for (; i < length; i++) {
		if (check_sub == 0) {
			if ((utf[i] >> 7) == 0) {
				continue;
			} else if ((utf[i] & 0xE0) == 0xC0) {
				check_sub = 1;
			} else if ((utf[i] & 0xF0) == 0xE0) {
				check_sub = 2;
			} else if ((utf[i] & 0xF8) == 0xF0) {
				check_sub = 3;
			} else if ((utf[i] & 0xFC) == 0xF8) {
				check_sub = 4;
			} else if ((utf[i] & 0xFE) == 0xFC) {
				check_sub = 5;
			} else {
				return 0;
			}
		} else {
			if ((utf[i] & 0xC0) != 0x80) {
				return 0;
			}
			check_sub--;
		}
	}
	return 1;
}
int hztpy(const char *read_buff, char *outbuf) {
	char overage_buff[7] = { 0 };
	char *_tmp = NULL;
	char inbuf[HZ2PY_INPUT_BUF_ARRAY_SIZE] = { 0 };
	int add_blank = 1;
	int polyphone_support = 1;
	int first_letter_only = 0;
	int convert_double_char = 0;
	// first_letter_only 是否只输出拼音首字母
	// polyphone_support 是否输出多音字
	// add_blank 是否在拼音之间追加空格
	// convert_double_char 是否转换全角字符为半角字符
	// overage_buff 末尾如果有多余的不能组成完整utf8字符的字节,将写到overage_buff,传NULL将输出到out
	_tmp = inbuf;
	if (strlen(overage_buff)) {
		strcpy(_tmp, overage_buff);
		_tmp += strlen(overage_buff);
		memset(overage_buff, '\0', 7);
	}
	strcpy(_tmp, read_buff);
	if (!is_utf8_string(inbuf)) {
		return -1;
	}
	utf8_to_pinyin(inbuf, outbuf, first_letter_only, polyphone_support,
	               add_blank, convert_double_char, overage_buff);
	return 1;
}
JNIEXPORT jstring JNICALL Java_com_tool_hz2py_Hz2py_hz2py(JNIEnv *env,
        jobject thiz, jstring text) {
	const char* pText = env->GetStringUTFChars(text, 0);
	char* oText = new char[512];//256中文
	memset(oText,0,512);
	hztpy(pText,oText);
	jstring returnText = env->NewStringUTF(oText);
	env->ReleaseStringUTFChars(text,pText);
	delete oText;
	return returnText;
}

头文件有点大,我直接上传给大家下载好了。 hz2py 直接编译好的.so共享库和java文件,注意,包名只能用这个,不能更改。 libHz2py

本文链接:https://www.it72.com/37.htm

推荐阅读
最新回复 (2)
返回