gpt4 book ai didi

org.archive.io.warc.WARCReader.iterator()方法的使用及代码示例

转载 作者:知者 更新时间:2024-03-27 14:27:05 26 4
gpt4 key购买 nike

本文整理了Java中org.archive.io.warc.WARCReader.iterator()方法的一些代码示例,展示了WARCReader.iterator()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WARCReader.iterator()方法的具体详情如下:
包路径:org.archive.io.warc.WARCReader
类名称:WARCReader
方法名:iterator

WARCReader.iterator介绍

暂无

代码示例

代码示例来源:origin: internetarchive/heritrix3

try {
  l.setLevel(Level.WARNING);
  for (final Iterator<ArchiveRecord> i = reader.iterator(); i.hasNext();) {
    WARCRecord r = (WARCRecord)i.next();
    if (!isARCType(r.getHeader().getMimetype())) {

代码示例来源:origin: ViDA-NYU/ache

public WarcRecordsIterator(Path directory) {
  try {
    filesStream = Files.newDirectoryStream(directory);
    filesIt = filesStream.iterator();
    if (filesIt.hasNext()) {
      Path file = filesIt.next();
      warcReader = openFile(file);
      warcRecordIterator = warcReader.iterator();
    }
  } catch (IOException e) {
    throw new IllegalArgumentException("Failed to open target repository folder: " + directory, e);
  }
}

代码示例来源:origin: org.netpreserve.commons/commons-web

@Override
public void dump(boolean compress)
throws IOException, java.text.ParseException {
  for (final Iterator<ArchiveRecord> i = iterator(); i.hasNext();) {
    ArchiveRecord r = i.next();
    System.out.println(r.getHeader().toString());
    r.dump();
    System.out.println();
  }
}

代码示例来源:origin: org.netpreserve.commons/webarchive-commons

@Override
public void dump(boolean compress)
throws IOException, java.text.ParseException {
  for (final Iterator<ArchiveRecord> i = iterator(); i.hasNext();) {
    ArchiveRecord r = i.next();
    System.out.println(r.getHeader().toString());
    r.dump();
    System.out.println();
  }
}

代码示例来源:origin: iipc/webarchive-commons

@Override
public void dump(boolean compress)
throws IOException, java.text.ParseException {
  for (final Iterator<ArchiveRecord> i = iterator(); i.hasNext();) {
    ArchiveRecord r = i.next();
    System.out.println(r.getHeader().toString());
    r.dump();
    System.out.println();
  }
}

代码示例来源:origin: lintool/warcbase

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
 FileSplit split = (FileSplit) genericSplit;
 Configuration job = context.getConfiguration();
 start = split.getStart();
 end = start + split.getLength();
 final Path file = split.getPath();
 FileSystem fs = file.getFileSystem(job);
 FSDataInputStream fileIn = fs.open(split.getPath());
 reader = (WARCReader) WARCReaderFactory.get(split.getPath().toString(),
   new BufferedInputStream(fileIn), true);
 iter = reader.iterator();
 //reader = (ARCReader) ARCReaderFactory.get(split.getPath().toString(), fileIn, true);
 this.pos = start;
}

代码示例来源:origin: ViDA-NYU/ache

filePath = filesIt.next();
  warcReader = openFile(filePath);
  warcRecordIterator = warcReader.iterator();
  nextRecord = (WARCRecord) warcReader.get();
} catch (IOException e) {

代码示例来源:origin: lintool/warcbase

for (Iterator<ArchiveRecord> ii = reader.iterator(); ii.hasNext();) {
  WARCRecord r = (WARCRecord) ii.next();
ArchiveRecordHeader h = r.getHeader();

代码示例来源:origin: iipc/openwayback

/**
   * @param reader The reader for the arc file to search.
 * @return Iterator of SearchResults for input arc File
 * @throws IOException
 */
public CloseableIterator<CaptureSearchResult> iterator(WARCReader reader)
    throws IOException {
  Adapter<ArchiveRecord, WARCRecord> adapter1 = new ArchiveRecordToWARCRecordAdapter();
  WARCRecordToSearchResultAdapter adapter2 = 
    new WARCRecordToSearchResultAdapter();
  adapter2.setCanonicalizer(canonicalizer);
  adapter2.setProcessAll(processAll);
  ArchiveReaderCloseableIterator itr1 = 
    new ArchiveReaderCloseableIterator(reader,reader.iterator());
  CloseableIterator<WARCRecord> itr2 = 
    new AdaptedIterator<ArchiveRecord, WARCRecord>(itr1, adapter1);
  return new AdaptedIterator<WARCRecord, CaptureSearchResult>(itr2, adapter2);
}

代码示例来源:origin: org.archive.heritrix/heritrix-commons

try {
  l.setLevel(Level.WARNING);
  for (final Iterator<ArchiveRecord> i = reader.iterator(); i.hasNext();) {
    WARCRecord r = (WARCRecord)i.next();
    if (!isARCType(r.getHeader().getMimetype())) {

代码示例来源:origin: org.netpreserve.openwayback/openwayback-core

/**
   * @param reader The reader for the arc file to search.
 * @return Iterator of SearchResults for input arc File
 * @throws IOException
 */
public CloseableIterator<CaptureSearchResult> iterator(WARCReader reader)
    throws IOException {
  Adapter<ArchiveRecord, WARCRecord> adapter1 = new ArchiveRecordToWARCRecordAdapter();
  WARCRecordToSearchResultAdapter adapter2 = 
    new WARCRecordToSearchResultAdapter();
  adapter2.setCanonicalizer(canonicalizer);
  adapter2.setProcessAll(processAll);
  ArchiveReaderCloseableIterator itr1 = 
    new ArchiveReaderCloseableIterator(reader,reader.iterator());
  CloseableIterator<WARCRecord> itr2 = 
    new AdaptedIterator<ArchiveRecord, WARCRecord>(itr1, adapter1);
  return new AdaptedIterator<WARCRecord, CaptureSearchResult>(itr2, adapter2);
}

代码示例来源:origin: ViDA-NYU/ache

boolean readSecond = false;
for (final Iterator<ArchiveRecord> i = reader.iterator(); i.hasNext();) {
  WARCRecord ar = (WARCRecord) i.next();
  if (!readWarcInfoRecord) {

26 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com