gpt4 book ai didi

java - Java 中清理 HTML Id 属性的函数

转载 作者:太空宇宙 更新时间:2023-11-04 08:58:55 25 4
gpt4 key购买 nike

我已经编写了下一个函数。但肯定有人有一种更优雅的方式来执行这项任务。

/**
*
* HTML 4 Specification
* ID and NAME tokens must begin with a letter ([A-Za-z]) and may be followed by any number
* of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"), and periods (".").
* @param s
* @return
*/
public static String sanitizeHTMLIdAttribute(String s) {
String sanitize = "";
if(s!=null) {
for(int i = 0; i < s.length(); i++) {
if(s.charAt(i) == '-' || s.charAt(i) == '_' || s.charAt(i) == ':' ||
s.charAt(i) == '.' || s.charAt(i) == '0' || s.charAt(i) == '1' ||
s.charAt(i) == '2' || s.charAt(i) == '3' || s.charAt(i) == '4' ||
s.charAt(i) == '5' || s.charAt(i) == '6' || s.charAt(i) == '7' ||
s.charAt(i) == '8' || s.charAt(i) == '9' ||
s.charAt(i) == 'a' || s.charAt(i) == 'b' || s.charAt(i) == 'c' ||
s.charAt(i) == 'd' || s.charAt(i) == 'e' || s.charAt(i) == 'f' ||
s.charAt(i) == 'g' || s.charAt(i) == 'h' || s.charAt(i) == 'i' ||
s.charAt(i) == 'j' || s.charAt(i) == 'k' || s.charAt(i) == 'l' ||
s.charAt(i) == 'm' || s.charAt(i) == 'n' || s.charAt(i) == 'o' ||
s.charAt(i) == 'p' || s.charAt(i) == 'q' || s.charAt(i) == 'r' ||
s.charAt(i) == 's' || s.charAt(i) == 't' || s.charAt(i) == 'u' ||
s.charAt(i) == 'w' || s.charAt(i) == 'x' || s.charAt(i) == 'y' ||
s.charAt(i) == 'z' ||
s.charAt(i) == 'A' || s.charAt(i) == 'B' || s.charAt(i) == 'C' ||
s.charAt(i) == 'D' || s.charAt(i) == 'E' || s.charAt(i) == 'F' ||
s.charAt(i) == 'G' || s.charAt(i) == 'H' || s.charAt(i) == 'I' ||
s.charAt(i) == 'J' || s.charAt(i) == 'K' || s.charAt(i) == 'L' ||
s.charAt(i) == 'M' || s.charAt(i) == 'N' || s.charAt(i) == 'O' ||
s.charAt(i) == 'P' || s.charAt(i) == 'Q' || s.charAt(i) == 'R' ||
s.charAt(i) == 'S' || s.charAt(i) == 'T' || s.charAt(i) == 'U' ||
s.charAt(i) == 'W' || s.charAt(i) == 'X' || s.charAt(i) == 'Y' ||
s.charAt(i) == 'Z') {
sanitize += s.charAt(i);
}
}
if(sanitize.length()>0) {
while(sanitize.charAt(0) == '0' || sanitize.charAt(0) == '1' ||
sanitize.charAt(0) == '2' || sanitize.charAt(0) == '3' ||
sanitize.charAt(0) == '4' || sanitize.charAt(0) == '5' ||
sanitize.charAt(0) == '6' || sanitize.charAt(0) == '7' ||
sanitize.charAt(0) == '8' || sanitize.charAt(0) == '9') {
sanitize = sanitize.substring(1, sanitize.length());
}
}
return sanitize;
}
return null;
}

最佳答案

我会做这样的事情:

/**
*
* HTML 4 Specification ID and NAME tokens must begin with a letter
* ([A-Za-z]) and may be followed by any number of letters, digits ([0-9]),
* hyphens ("-"), underscores ("_"), colons (":"), and periods (".").
*
* @param s
* @return
*/
public static String sanitizeHTMLIdAttribute(String s) {
if (s == null) return null;
StringBuilder sb = new StringBuilder();
int firstLegal = 0;
while (firstLegal < s.length() && !isAZ(s.charAt(firstLegal)))
++firstLegal;
for (int i = firstLegal; i < s.length(); ++i){
final char ch = s.charAt(i);
if (isOkIdInnerChar(ch)) sb.append(ch);
}
return sb.length() == s.length()? s : sb.toString();
}

private static boolean isOkIdInnerChar(char ch) {
return isAZ(ch) || isNum(ch) || isSpecial(ch);
}

private static boolean isSpecial(char ch) {
switch (ch) {
case '-': case '_':
case ':': case '.':
return true;
default:
return false;
}
}

private static boolean isAZ(char ch) {
return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
}

private static boolean isNum(char ch) {
return '0' <= ch && ch <= '9';
}

...不过,如果 s == null,我可能更愿意抛出一个 NullPointerException,如果 s 不包含合法字符,则抛出 IllegalArgumentException,但这当然是一个偏好问题。一些额外的功能:

  • 如果s是一个有效的ID,它会按原样返回以节省空间(更少的字符串实例 float )和时间(String构造很昂贵——是的,我知道分配很便宜,但是除了分配之外还有更多的事情发生)。
  • 我不使用 Character.isDigit '因为它对所有 Unicode 数字返回 true,包括“٣”之类的内容
  • 我不使用 Character.isLetter,因为它对所有 Unicode 字母返回 true,包括“å”之类的内容

关于java - Java 中清理 HTML Id 属性的函数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/1404183/

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com