gpt4 book ai didi

Java 剪贴板 : Paste HTML from Firefox on Linux

转载 作者:IT王子 更新时间:2023-10-29 00:25:50 34 4
gpt4 key购买 nike

将 HTML 从 Firefox 粘贴到 Java6 应用程序时遇到一个奇怪的问题(仅!)在 Linux 上。这是一个最小的例子:

import java.awt.Toolkit;
import java.awt.datatransfer.Clipboard;
import java.awt.datatransfer.DataFlavor;
import java.awt.datatransfer.Transferable;
import java.io.Reader;
import java.nio.ByteBuffer;

class ClipboardPrinter {
public static void main( String args[] ) throws Exception
{
Clipboard systemClipboard = Toolkit.getDefaultToolkit()
.getSystemClipboard();
Transferable transferData = systemClipboard.getContents(null);
if (transferData == null) {
System.out.println("no content");
return;
}

// final DataFlavor htmlFlavorString = new DataFlavor("text/html;class=java.lang.String");
// String html = (String)transferData.getTransferData(htmlFlavorString);
// System.out.println("html = '" + html + "'");

final DataFlavor htmlFlavor = new DataFlavor("text/html;class=java.nio.ByteBuffer;charset=US-ASCII");
if (!transferData.isDataFlavorSupported(htmlFlavor)) {
System.out.println("no text/html reader content");
return;
}

ByteBuffer bb = (ByteBuffer)transferData.getTransferData(htmlFlavor);
byte[] bytes = bb.array();
for (byte b: bytes)
{
System.out.format("%02x", b);
}
System.out.println();
final int cutoff = 2;
byte[] bytes2 = new byte[bytes.length - cutoff];
for (int i = cutoff; i < bytes.length; i++)
bytes2[i-cutoff] = bytes[i];
final String htmlContent = new String(bytes2, "UTF-16LE");


System.out.println("htmlContent = '" + htmlContent + "'");
}
}

首先我尝试使用 new DataFlavor("text/html;class=java.lang.String"),(代码在上面的片段中被注释掉了),但这会导致无法使用开头有 2 个字符且值为 65533 的字符串(这没有帮助切断这两个字符)。

接下来我使用了一个带有 charset=US-ASCII 的 ByteBuffer 数据类型(我使用了 ASCII故意的!):charset=UTF-16LE(或 UTF-16 或 UTF-16BE)在全部。使用上面的 charset=US-ASCII 解决方案(以及 new
String(bytes2, "UTF-16LE")
),7 位字符有效(但例如变音符号不工作,一个'?而是打印)。

我截掉了两个字节,因为开头好像有两个boms(不是当然,可能是其他东西)?

我用 charset=UTF-8 和数据风格得到了类似的结果cutoff=6(两个三字节“替换字符”0xEFBFBD 在开头和元音编码为两个错误的字符)。同时我使用 new String(bytes2, "UTF-16LE") 的情况。

您对如何:有什么建议吗?

  • 在此解决方案中支持非 ASCII 字符(或找到更好的解决方案)?
  • 判断是UTF-16LE还是UTF-16BE?

谢谢!感谢任何提示!

顺便说一句:这是我的 (Linux) 系统支持的数据类型(来自 transferable.getTransferDataFlavors()):

[java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.Reader]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.lang.String]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.CharBuffer]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[C]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=UTF-16]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=UTF-16]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=UTF-16]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.io.InputStream;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=java.nio.ByteBuffer;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=text/html;representationclass=[B;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=application/x-java-serialized-object;representationclass=java.lang.String]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.Reader]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.lang.String]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.CharBuffer]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[C]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=unicode]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=UTF-16]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=UTF-16]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=UTF-8]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=UTF-16BE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=UTF-16LE]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=ISO-8859-1]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.io.InputStream;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=java.nio.ByteBuffer;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=text/plain;representationclass=[B;charset=US-ASCII]
java.awt.datatransfer.DataFlavor[mimetype=text/x-moz-url-priv;representationclass=java.io.InputStream]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlinfo;representationclass=java.io.InputStream]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlcontext;representationclass=java.io.InputStream]
java.awt.datatransfer.DataFlavor[mimetype=text/x-moz-url-priv;representationclass=java.nio.ByteBuffer]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlinfo;representationclass=java.nio.ByteBuffer]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlcontext;representationclass=java.nio.ByteBuffer]
java.awt.datatransfer.DataFlavor[mimetype=text/x-moz-url-priv;representationclass=[B]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlinfo;representationclass=[B]
java.awt.datatransfer.DataFlavor[mimetype=text/_moz_htmlcontext;representationclass=[B]]

最佳答案

我认为这个问题与他从剪贴板读取为 US-ASCII,然后转换为 unicode 并希望完整保留德语元音变音这一事实有关。由于 US-ASCII 是 7 位字符集,因此不包括德语变音符号,并且在将剪贴板读取为 US-ASCII 后已经丢失。

public class CharsetDemo {
public static void main(String[] args) throws Exception {
byte[] bytes;

// convert the German umlaut to bytes in US-ASCII charset
bytes = "ö".getBytes("US-ASCII");
System.out.println("US-ASCII");
System.out.println("bytes : " + asHexString(bytes));
System.out.println("string: " + new String(bytes, "US-ASCII"));
System.out.println();

// create a unicode string from the US-ASCII bytes
String utf8String = new String(bytes, "UTF-8");
bytes = utf8String.getBytes("UTF-8");
System.out.println("UTF-8");
System.out.println("bytes : " + asHexString(bytes));
System.out.println("string: " + utf8String);
System.out.println();

// convert the German umlaut to bytes in ISO-8859-1 charset
bytes = "ö".getBytes("ISO-8859-1");
System.out.println("ISO 8859-1");
System.out.println("bytes : " + asHexString(bytes));
System.out.println("string: " + new String(bytes, "ISO-8859-1"));
System.out.println();

// create a unicode string from the ISO-8859-1 bytes
utf8String = new String(bytes, "UTF-8");
bytes = utf8String.getBytes("UTF-8");
System.out.println("UTF-8");
System.out.println("bytes : " + asHexString(bytes));
System.out.println("string: " + utf8String);
System.out.println();

// bytes of the "REPLACEMET CHARACTER"
System.out.println("replacement character bytes: "
+ asHexString("\uFFFD".getBytes("UTF-8")));

}

static String asHexString(byte[] bytes) {
StringBuilder sb = new StringBuilder();
for (byte b : bytes) {
sb.append(String.format("%X ", b));
}
return sb.toString();
}
}

输出

US-ASCII
bytes : 3F
string: ? <--- the question mark represents here the "REPLACEMENT CHARACTER"

UTF-8
bytes : 3F
string: ?

ISO 8859-1
bytes : F6
string: ö

UTF-8
bytes : EF BF BD <-- the "REPLACEMENT CHARACTER", as "F6" is not a valid UTF-8 codepoint
string: �

replacement character bytes: EF BF BD

关于Java 剪贴板 : Paste HTML from Firefox on Linux,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/14800053/

34 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com