当前位置:网站首页 > Java基础 > 正文

nlp教程java



package com.nunu.ai.nlp.controller;

import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.StringUtils; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController;

import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties;

/

  • created with IDEA *
  • @author:huqm
  • @date:2020/12/7
  • @time:16:20 <p> *
  • </p> */ @RestController @RequestMapping(”/test”) public class TestController {

    @RequestMapping(“getStr”) public List<String> getStr(String text){

    </span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> segInCh(text); 

    } public List&lt;String&gt; segInCh(String text) {

    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">载入properties 文件 

    // StanfordCoreNLP pipline = new StanfordCoreNLP(“StanfordCoreNLP-chinese.properties”);

    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">1.2 自定义功能 (1) </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">StanfordCoreNLP的各个组件(annotator)按“tokenize(分词), ssplit(断句), pos(词性标注), lemma(词元化), </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> ner(命名实体识别), parse(语法分析), dcoref(同义词分辨)”顺序进行处理。 

    // Properties properties = new Properties(); // properties.setProperty(“annotators”, “tokenize, ssplit, pos, lemma, ner, parse, dcoref”); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);

    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(2) 自己在项目中建一个properties 文件,然后在文件中设置模型属性,可以参考1中的配置文件 

    // String[] args = new String[]{“-props”, “properies/CoreNLP-Seg-CH.properties”}; // Properties properties = StringUtils.argsToProperties(args); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);

    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(3)</span> StanfordCoreNLP pipline = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> StanfordCoreNLP(PropertiesUtils.asProperties( </span>"annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref"<span style="color: rgba(0, 0, 0, 1)">, </span>"ssplit.isOneSentence", "true"<span style="color: rgba(0, 0, 0, 1)">, </span>"tokenize.language", "zh"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanPostProcessing", "true"<span style="color: rgba(0, 0, 0, 1)"> )); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">创建一个解析器,传入的是需要解析的文本</span> Annotation annotation = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Annotation(text); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">解析</span> 

    pipline.annotate(annotation);

    </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">根据标点符号,进行句子的切分,每一个句子被转化为一个CoreMap的数据结构,保存了句子的信息()</span> List&lt;CoreMap&gt; sentences = annotation.get(CoreAnnotations.SentencesAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">从CoreMap 中取出CoreLabel List ,打印</span> ArrayList&lt;String&gt; list = <span style="color: rgba(0, 0, 255, 1)">new</span> ArrayList&lt;&gt;<span style="color: rgba(0, 0, 0, 1)">(); </span><span style="color: rgba(0, 0, 255, 1)">for</span><span style="color: rgba(0, 0, 0, 1)"> (CoreMap sentence : sentences) { </span><span style="color: rgba(0, 0, 255, 1)">for</span> (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">)) { String word </span>= token.get(CoreAnnotations.TextAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the POS tag of the token</span> String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the NER label of the token</span> String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String lemma </span>= token.get(CoreAnnotations.LemmaAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String result</span>=word+" "+pos+" "+lemma+" "+<span style="color: rgba(0, 0, 0, 1)">ne; list.add(result); } } </span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> list; 

    }

}

版权声明


相关文章:

  • Java 运算符 菜鸟教程2025-10-30 21:02:03
  • java项目实战教程 pdf2025-10-30 21:02:03
  • java hibernate教程 pdf2025-10-30 21:02:03
  • java编程思想教程2025-10-30 21:02:03
  • java struct教程2025-10-30 21:02:03
  • java微信开发教程 pdf2025-10-30 21:02:03
  • 像素鸟java教程2025-10-30 21:02:03
  • java保姆级教程2025-10-30 21:02:03
  • java模式设计教程2025-10-30 21:02:03
  • linux java 安装教程2025-10-30 21:02:03