gpt4 book ai didi

java - java中如何获取词干后的符号

转载 作者:行者123 更新时间:2023-12-01 14:37:36 26 4
gpt4 key购买 nike

我有一个问题,在stem之后我无法得到像($,...)这样的符号,因为在输出中我需要示例字符串单词的价格= 44,66$;//词干提取后//输出 = 4466字符串的词干非常好,但我想要不改变价格的数字(价格),因为我需要输出后的价格。最后我的问题是如何获得像(44.33$)这样的价格数字,并在这个主干代码之后带有符号?

//////这里所有代码“”

class NewStrings {
public String str;

NewString() {
str = "";
}
}

public class Stemmer {

private String Clean( String str ) {
int last = str.length();

Character ch = new Character( str.charAt(0) );
String temp = "";

for ( int i=0; i < last; i++ ) {
if ( ch.isLetterOrDigit( str.charAt(i) ) )
temp += str.charAt(i);
}

return temp;
} //clean

private boolean hasSuffix( String word, String suffix, NewString stem ) {

String tmp = "";

if ( word.length() <= suffix.length() )
return false;
if (suffix.length() > 1)
if ( word.charAt( word.length()-2 ) != suffix.charAt( suffix.length()-2 ) )
return false;

stem.str = "";

for ( int i=0; i<word.length()-suffix.length(); i++ )
stem.str += word.charAt( i );
tmp = stem.str;

for ( int i=0; i<suffix.length(); i++ )
tmp += suffix.charAt( i );

if ( tmp.compareTo( word ) == 0 )
return true;
else
return false;
}

private boolean vowel( char ch, char prev ) {
switch ( ch ) {
case 'a': case 'e': case 'i': case 'o': case 'u':
return true;
case 'y': {

switch ( prev ) {
case 'a': case 'e': case 'i': case 'o': case 'u':
return false;

default:
return true;
}
}

default :
return false;
}
}

private int measure( String stem ) {

int i=0, count = 0;
int length = stem.length();

while ( i < length ) {
for ( ; i < length ; i++ ) {
if ( i > 0 ) {
if ( vowel(stem.charAt(i),stem.charAt(i-1)) )
break;
}
else {
if ( vowel(stem.charAt(i),'a') )
break;
}
}

for ( i++ ; i < length ; i++ ) {
if ( i > 0 ) {
if ( !vowel(stem.charAt(i),stem.charAt(i-1)) )
break;
}
else {
if ( !vowel(stem.charAt(i),'?') )
break;
}
}
if ( i < length ) {
count++;
i++;
}
} //while

return(count);
}

private boolean containsVowel( String word ) {

for (int i=0 ; i < word.length(); i++ )
if ( i > 0 ) {
if ( vowel(word.charAt(i),word.charAt(i-1)) )
return true;
}
else {
if ( vowel(word.charAt(0),'a') )
return true;
}

return false;
}

private boolean cvc( String str ) {
int length=str.length();

if ( length < 3 )
return false;

if ( (!vowel(str.charAt(length-1),str.charAt(length-2)) )
&& (str.charAt(length-1) != 'w') && (str.charAt(length-1) != 'x') && (str.charAt(length-1) != 'y')
&& (vowel(str.charAt(length-2),str.charAt(length-3))) ) {

if (length == 3) {
if (!vowel(str.charAt(0),'?'))
return true;
else
return false;
}
else {
if (!vowel(str.charAt(length-3),str.charAt(length-4)) )
return true;
else
return false;
}
}

return false;
}

private String step1( String str ) {

NewString stem = new NewString();

if ( str.charAt( str.length()-1 ) == 's' ) {
if ( (hasSuffix( str, "sses", stem )) || (hasSuffix( str, "ies", stem)) ){
String tmp = "";
for (int i=0; i<str.length()-2; i++)
tmp += str.charAt(i);
str = tmp;
}
else {
if ( ( str.length() == 1 ) && ( str.charAt(str.length()-1) == 's' ) ) {
str = "";
return str;
}
if ( str.charAt( str.length()-2 ) != 's' ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt(i);
str = tmp;
}
}
}

if ( hasSuffix( str,"eed",stem ) ) {
if ( measure( stem.str ) > 0 ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt( i );
str = tmp;
}
}
else {
if ( (hasSuffix( str,"ed",stem )) || (hasSuffix( str,"ing",stem )) ) {
if (containsVowel( stem.str )) {

String tmp = "";
for ( int i = 0; i < stem.str.length(); i++)
tmp += str.charAt( i );
str = tmp;
if ( str.length() == 1 )
return str;

if ( ( hasSuffix( str,"at",stem) ) || ( hasSuffix( str,"bl",stem ) ) || ( hasSuffix( str,"iz",stem) ) ) {
str += "e";

}
else {
int length = str.length();
if ( (str.charAt(length-1) == str.charAt(length-2))
&& (str.charAt(length-1) != 'l') && (str.charAt(length-1) != 's') && (str.charAt(length-1) != 'z') ) {

tmp = "";
for (int i=0; i<str.length()-1; i++)
tmp += str.charAt(i);
str = tmp;
}
else
if ( measure( str ) == 1 ) {
if ( cvc(str) )
str += "e";
}
}
}
}
}

if ( hasSuffix(str,"y",stem) )
if ( containsVowel( stem.str ) ) {
String tmp = "";
for (int i=0; i<str.length()-1; i++ )
tmp += str.charAt(i);
str = tmp + "i";
}
return str;
}

private String step2( String str ) {

String[][] suffixes = { { "ational", "ate" },
{ "tional", "tion" },
{ "enci", "ence" },
{ "anci", "ance" },
{ "izer", "ize" },
{ "iser", "ize" },
{ "abli", "able" },
{ "alli", "al" },
{ "entli", "ent" },
{ "eli", "e" },
{ "ousli", "ous" },
{ "ization", "ize" },
{ "isation", "ize" },
{ "ation", "ate" },
{ "ator", "ate" },
{ "alism", "al" },
{ "iveness", "ive" },
{ "fulness", "ful" },
{ "ousness", "ous" },
{ "aliti", "al" },
{ "iviti", "ive" },
{ "biliti", "ble" }};
NewString stem = new NewString();


for ( int index = 0 ; index < suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index][0], stem ) ) {
if ( measure ( stem.str ) > 0 ) {
str = stem.str + suffixes[index][1];
return str;
}
}
}

return str;
}

private String step3( String str ) {

String[][] suffixes = { { "icate", "ic" },
{ "ative", "" },
{ "alize", "al" },
{ "alise", "al" },
{ "iciti", "ic" },
{ "ical", "ic" },
{ "ful", "" },
{ "ness", "" }};
NewString stem = new NewString();

for ( int index = 0 ; index<suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index][0], stem ))
if ( measure ( stem.str ) > 0 ) {
str = stem.str + suffixes[index][1];
return str;
}
}
return str;
}

private String step4( String str ) {

String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion", "tion",
"ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise"};

NewString stem = new NewString();

for ( int index = 0 ; index<suffixes.length; index++ ) {
if ( hasSuffix ( str, suffixes[index], stem ) ) {

if ( measure ( stem.str ) > 1 ) {
str = stem.str;
return str;
}
}
}
return str;
}

private String step5( String str ) {

if ( str.charAt(str.length()-1) == 'e' ) {
if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
String tmp = "";
for ( int i=0; i<str.length()-1; i++ )
tmp += str.charAt( i );
str = tmp;
}
else
if ( measure(str) == 1 ) {
String stem = "";
for ( int i=0; i<str.length()-1; i++ )
stem += str.charAt( i );

if ( !cvc(stem) )
str = stem;
}
}

if ( str.length() == 1 )
return str;
if ( (str.charAt(str.length()-1) == 'l') && (str.charAt(str.length()-2) == 'l') && (measure(str) > 1) )
if ( measure(str) > 1 ) {/* measure(str)==measure(stem) if ends in vowel */
String tmp = "";
for ( int i=0; i<str.length()-1; i++ )
tmp += str.charAt( i );
str = tmp;
}
return str;
}

//这是您的添加代码

  /*** chrs besides these will be removed */

static final String allowedChrs = "$€¥£0123456789,.";
public static String step6(String str){

int sizeOfString = str.length();
StringBuilder tmp = new StringBuilder();

for(int i=0;i<sizeOfString;++i){

if(allowedChrs.indexOf(str.charAt(i)) > -1){
tmp.append(str.charAt(i));
}
}
return tmp.toString();
}

private String stripPrefixes ( String str) {

String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo"};

int last = prefixes.length;
for ( int i=0 ; i<last; i++ ) {
if ( str.startsWith( prefixes[i] ) ) {
String temp = "";
for ( int j=0 ; j< str.length()-prefixes[i].length(); j++ )
temp += str.charAt( j+prefixes[i].length() );
return temp;
}
}

return str;
}


private String stripSuffixes( String str ) {

str = step1( str );
if ( str.length() >= 1 )
str = step2( str );
if ( str.length() >= 1 )
str = step3( str );
if ( str.length() >= 1 )
str = step4( str );
if ( str.length() >= 1 )
str = step5( str );
if ( str.length() >= 1 )
str = step6( str );
return str;
}

public static void main(String[] args) {
String Word = "3.4$";
// String str = stripAffixes (Word);
porrrr fun = new porrrr();
fun.stripAffixes(Word);

System.out.println(fun.stripAffixes(Word));

} //stripAffixes
public String stripAffixes( String str ) {

str = str.toLowerCase();
str = Clean(str);

if (( str != "" ) && (str.length() > 2)) {
str = stripPrefixes(str);

if (str != "" )
str = stripSuffixes(str);

}

return str;
} //stripAffixes

} //class

最佳答案

很难理解你到底要做什么,但是,你见过你的 Clean 方法吗?

 if ( ch.isLetterOrDigit( str.charAt(i) ) ){ // you only take letters and digits from the initial String.

$ 和逗号既不是数字也不是字母

编辑它应该看起来像这样:

    static final List<Character> list = Arrays.asList('$','€','¥','£');

private String step6(String str){
int sizeOfString = str.length();
StringBuilder tmp = new StringBuilder();
for(int i=0;i<sizeOfString;++i){
if(list.contains(str.charAt(i))){
tmp.append(str.charAt(i));
}

}
return tmp.toString();
}

关于java - java中如何获取词干后的符号,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/16333952/

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com