gpt4 book ai didi

org.archive.io.warc.WARCReader.get()方法的使用及代码示例

转载 作者:知者 更新时间:2024-03-27 14:35:05 26 4
gpt4 key购买 nike

本文整理了Java中org.archive.io.warc.WARCReader.get()方法的一些代码示例,展示了WARCReader.get()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WARCReader.get()方法的具体详情如下:
包路径:org.archive.io.warc.WARCReader
类名称:WARCReader
方法名:get

WARCReader.get介绍

暂无

代码示例

代码示例来源:origin: lintool/warcbase

/**
 * Converts raw bytes into an {@code WARCRecord}.
 *
 * @param bytes raw bytes
 * @return parsed {@code WARCRecord}
 * @throws IOException
 */
public static WARCRecord fromBytes(byte[] bytes) throws IOException {
 WARCReader reader = (WARCReader) WARCReaderFactory.get("",
   new BufferedInputStream(new ByteArrayInputStream(bytes)), false);
 return (WARCRecord) reader.get();
}

代码示例来源:origin: lintool/warcbase

@Override
public Resource retrieveResource(CaptureSearchResult result) throws ResourceNotAvailableException {
 Resource r = null;
 String resourceUrl = "http://" + host + ":" + port + "/" + table + "/"
   + ArchiveUtils.get14DigitDate(result.getCaptureDate()) + "/" + result.getOriginalUrl();
 LOGGER.info("Fetching resource url: " + resourceUrl);
 try {
  // Read first 4 bytes of input stream to detect archive format; push back into stream for re-use
  PushbackInputStream pb = new PushbackInputStream(new URL(resourceUrl).openStream(), 4);
  byte[] signature = new byte[4];
  pb.read(signature, 0, 4);
  pb.unread(signature);
  if ((new String(signature)).equals("WARC")) {
   WARCReader reader = (WARCReader) WARCReaderFactory.get(resourceUrl.toString(), pb, false);
   r = ResourceFactory.WARCArchiveRecordToResource(reader.get(), reader);
  } else {
   // Assume ARC format if not WARC
   ARCReader reader = (ARCReader) ARCReaderFactory.get(resourceUrl.toString(), pb, false);
   r = ResourceFactory.ARCArchiveRecordToResource(reader.get(), reader);
  }
 } catch (IOException e) {
  throw new ResourceNotAvailableException("Error reading " + resourceUrl);
 }
 if (r == null) {
  throw new ResourceNotAvailableException("Unable to find: " + result.toString());
 }
 return r;
}

代码示例来源:origin: iipc/openwayback

public static Resource getResource(URL url, long offset)
throws IOException, ResourceNotAvailableException {
  
  Resource r = null;
  long start = System.currentTimeMillis();
  TimeoutArchiveReaderFactory tarf = defaultTimeoutReader;
  ArchiveReader reader = tarf.getArchiveReader(url,offset);
  if(reader instanceof ARCReader) {
    ARCReader areader = (ARCReader) reader;
    r = ARCArchiveRecordToResource(areader.get(),areader);
  
  } else if(reader instanceof WARCReader) {
    WARCReader wreader = (WARCReader) reader;
    r = WARCArchiveRecordToResource(wreader.get(),wreader);
    
  } else {
    throw new ResourceNotAvailableException("Unknown ArchiveReader");
  }
  long elapsed = System.currentTimeMillis() - start;
  PerformanceLogger.noteElapsed("Http11Resource", elapsed, url.toExternalForm());
  return r;
}

代码示例来源:origin: org.netpreserve.openwayback/openwayback-core

public static Resource getResource(URL url, long offset)
throws IOException, ResourceNotAvailableException {
  
  Resource r = null;
  long start = System.currentTimeMillis();
  TimeoutArchiveReaderFactory tarf = defaultTimeoutReader;
  ArchiveReader reader = tarf.getArchiveReader(url,offset);
  if(reader instanceof ARCReader) {
    ARCReader areader = (ARCReader) reader;
    r = ARCArchiveRecordToResource(areader.get(),areader);
  
  } else if(reader instanceof WARCReader) {
    WARCReader wreader = (WARCReader) reader;
    r = WARCArchiveRecordToResource(wreader.get(),wreader);
    
  } else {
    throw new ResourceNotAvailableException("Unknown ArchiveReader");
  }
  long elapsed = System.currentTimeMillis() - start;
  PerformanceLogger.noteElapsed("Http11Resource", elapsed, url.toExternalForm());
  return r;
}

代码示例来源:origin: ViDA-NYU/ache

warcReader = openFile(filePath);
  warcRecordIterator = warcReader.iterator();
  nextRecord = (WARCRecord) warcReader.get();
} catch (IOException e) {
  String f = filePath == null ? null : filePath.toString();

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com