gpt4 book ai didi

edu.stanford.nlp.process.WordToSentenceProcessor.process()方法的使用及代码示例

转载 作者:知者 更新时间:2024-03-24 00:01:05 25 4
gpt4 key购买 nike

本文整理了Java中edu.stanford.nlp.process.WordToSentenceProcessor.process()方法的一些代码示例,展示了WordToSentenceProcessor.process()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。WordToSentenceProcessor.process()方法的具体详情如下:
包路径:edu.stanford.nlp.process.WordToSentenceProcessor
类名称:WordToSentenceProcessor
方法名:process

WordToSentenceProcessor.process介绍

[英]Returns a List of Lists where each element is built from a run of Words in the input Document. Specifically, reads through each word in the input document and breaks off a sentence after finding a valid sentence boundary token or end of file. Note that for this to work, the words in the input document must have been tokenized with a tokenizer that makes sentence boundary tokens their own tokens (e.g., PTBTokenizer).
[中]返回一个列表,其中每个元素都是从输入文档中的一系列单词生成的。具体来说,阅读输入文档中的每个单词,并在找到有效的句子边界标记或文件结尾后断开一个句子。请注意,为了实现这一点,输入文档中的单词必须使用标记器进行标记,该标记器使句子边界标记成为自己的标记(例如,PTBTokenizer)。

代码示例

代码示例来源:origin: stanfordnlp/CoreNLP

public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
 Document<L, F, List<IN>> doc = in.blankDocument();
 doc.addAll(process(in));
 return doc;
}

代码示例来源:origin: stanfordnlp/CoreNLP

wts = new WordToSentenceProcessor<>();
List<List<IN>> sentences = wts.process(document);
List<IN> newDocument = new ArrayList<>();
for (List<IN> sentence : sentences) {

代码示例来源:origin: stanfordnlp/CoreNLP

public static void addEnhancedSentences(Annotation doc) {
 //for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
 //for each sieve that potentially uses augmentedSentences in original:
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 WordToSentenceProcessor wsp =
     new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
 int prevParagraph = 0;
 for(int i = 1; i < sentences.size(); i++) {
  CoreMap sentence = sentences.get(i);
  CoreMap prevSentence = sentences.get(i-1);
  List<CoreLabel> tokensConcat = new ArrayList<>();
  tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
  tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
  List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
  if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
   sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
  }
 }
}

代码示例来源:origin: stanfordnlp/CoreNLP

List<List<IN>> sentences = wts.process(words);
String after = "";
IN last = null;

代码示例来源:origin: stanfordnlp/CoreNLP

for (List<CoreLabel> sentenceTokens: wts.process(tokens)) {
 if (countLineNumbers) {
  ++lineNumber;

代码示例来源:origin: stackoverflow.com

//split via PTBTokenizer (PTBLexer)
   List<CoreLabel> tokens = PTBTokenizer.coreLabelFactory().getTokenizer(new StringReader(text)).tokenize();
   //do the processing using stanford sentence splitter (WordToSentenceProcessor)
   WordToSentenceProcessor processor = new WordToSentenceProcessor();
   List<List<CoreLabel>> splitSentences = processor.process(tokens);
   //for each sentence
   for (List<CoreLabel> s : splitSentences) {                
     //for each word
     for (CoreLabel token : s) {
       //here you can get the token value and position like;
       //token.value(), token.beginPosition(), token.endPosition()
     }    
   }

代码示例来源:origin: edu.stanford.nlp/stanford-parser

public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
 Document<L, F, List<IN>> doc = in.blankDocument();
 doc.addAll(process(in));
 return doc;
}

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
 Document<L, F, List<IN>> doc = in.blankDocument();
 doc.addAll(process(in));
 return doc;
}

代码示例来源:origin: edu.stanford.nlp/corenlp

public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
 Document<L, F, List<IN>> doc = in.blankDocument();
 doc.addAll(process(in));
 return doc;
}

代码示例来源:origin: com.guokr/stan-cn-com

public <L, F> Document<L, F, List<IN>> processDocument(Document<L, F, IN> in) {
 Document<L, F, List<IN>> doc = in.blankDocument();
 doc.addAll(process(in));
 return doc;
}

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

wts = new WordToSentenceProcessor<>();
List<List<IN>> sentences = wts.process(document);
List<IN> newDocument = new ArrayList<>();
for (List<IN> sentence : sentences) {

代码示例来源:origin: com.guokr/stan-cn-com

List<List<IN>> sentences = wts.process(document);
List<IN> newDocument = new ArrayList<IN>();
for (List<IN> sentence : sentences) {

代码示例来源:origin: edu.stanford.nlp/corenlp

List<List<IN>> sentences = wts.process(document);
List<IN> newDocument = new ArrayList<IN>();
for (List<IN> sentence : sentences) {

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

public static void addEnhancedSentences(Annotation doc) {
 //for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
 //for each sieve that potentially uses augmentedSentences in original:
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 WordToSentenceProcessor wsp =
     new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
 int prevParagraph = 0;
 for(int i = 1; i < sentences.size(); i++) {
  CoreMap sentence = sentences.get(i);
  CoreMap prevSentence = sentences.get(i-1);
  List<CoreLabel> tokensConcat = new ArrayList<>();
  tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
  tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
  List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
  if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
   sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
  }
 }
}

代码示例来源:origin: org.maochen.nlp/CoreNLP-NLP

public static List<String> segmenter(final String blob) {
  if (blob == null) {
    return null;
  }
  TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer
      .factory(new CoreLabelTokenFactory(), "normalizeCurrency=false,ptb3Escaping=false");
  Tokenizer<CoreLabel> tokenizer = tokenizerFactory.getTokenizer(new StringReader(blob));
  List<CoreLabel> tokens = new ArrayList<>();
  while (tokenizer.hasNext()) {
    tokens.add(tokenizer.next());
  }
  List<List<CoreLabel>> sentences = new WordToSentenceProcessor<CoreLabel>().process(tokens);
  int end;
  int start = 0;
  List<String> sentenceList = new ArrayList<>();
  for (List<CoreLabel> sentence : sentences) {
    end = sentence.get(sentence.size() - 1).endPosition();
    sentenceList.add(blob.substring(start, end).trim());
    start = end;
  }
  return sentenceList;
}

代码示例来源:origin: edu.stanford.nlp/corenlp

for (List<CoreLabel> sentenceTokens: this.wts.process(tokens)) {
 if (sentenceTokens.size() == 0) {
  throw new RuntimeException("unexpected empty sentence: " + sentenceTokens);

代码示例来源:origin: edu.stanford.nlp/stanford-corenlp

List<List<IN>> sentences = wts.process(words);
String after = "";
IN last = null;

代码示例来源:origin: edu.stanford.nlp/stanford-parser

List<List<IN>> sentences = wts.process(words);
String after = "";
IN last = null;

代码示例来源:origin: edu.stanford.nlp/corenlp

List<List<IN>> sentences = wts.process(words);
String after = "";
IN last = null;

代码示例来源:origin: com.guokr/stan-cn-com

List<List<IN>> sentences = wts.process(words);
String after = "";
IN last = null;

25 4 0
Copyright 2021 - 2024 cfsdn All Rights Reserved 蜀ICP备2022000587号
广告合作:1813099741@qq.com 6ren.com