package com.nunu.ai.nlp.controller;
import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.StringUtils; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController;
import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties;
/
- created with IDEA *
- @author:huqm
- @date:2020/12/7
- @time:16:20 <p> *
- </p> */ @RestController @RequestMapping(”/test”) public class TestController {
@RequestMapping(“getStr”) public List<String> getStr(String text){
</span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> segInCh(text);} public List<String> segInCh(String text) {
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">载入properties 文件// StanfordCoreNLP pipline = new StanfordCoreNLP(“StanfordCoreNLP-chinese.properties”);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">1.2 自定义功能 (1) </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">StanfordCoreNLP的各个组件(annotator)按“tokenize(分词), ssplit(断句), pos(词性标注), lemma(词元化), </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> ner(命名实体识别), parse(语法分析), dcoref(同义词分辨)”顺序进行处理。// Properties properties = new Properties(); // properties.setProperty(“annotators”, “tokenize, ssplit, pos, lemma, ner, parse, dcoref”); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(2) 自己在项目中建一个properties 文件,然后在文件中设置模型属性,可以参考1中的配置文件// String[] args = new String[]{“-props”, “properies/CoreNLP-Seg-CH.properties”}; // Properties properties = StringUtils.argsToProperties(args); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(3)</span> StanfordCoreNLP pipline = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> StanfordCoreNLP(PropertiesUtils.asProperties( </span>"annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref"<span style="color: rgba(0, 0, 0, 1)">, </span>"ssplit.isOneSentence", "true"<span style="color: rgba(0, 0, 0, 1)">, </span>"tokenize.language", "zh"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanPostProcessing", "true"<span style="color: rgba(0, 0, 0, 1)"> )); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">创建一个解析器,传入的是需要解析的文本</span> Annotation annotation = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Annotation(text); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">解析</span>pipline.annotate(annotation);
</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">根据标点符号,进行句子的切分,每一个句子被转化为一个CoreMap的数据结构,保存了句子的信息()</span> List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">从CoreMap 中取出CoreLabel List ,打印</span> ArrayList<String> list = <span style="color: rgba(0, 0, 255, 1)">new</span> ArrayList<><span style="color: rgba(0, 0, 0, 1)">(); </span><span style="color: rgba(0, 0, 255, 1)">for</span><span style="color: rgba(0, 0, 0, 1)"> (CoreMap sentence : sentences) { </span><span style="color: rgba(0, 0, 255, 1)">for</span> (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">)) { String word </span>= token.get(CoreAnnotations.TextAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the POS tag of the token</span> String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the NER label of the token</span> String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String lemma </span>= token.get(CoreAnnotations.LemmaAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String result</span>=word+" "+pos+" "+lemma+" "+<span style="color: rgba(0, 0, 0, 1)">ne; list.add(result); } } </span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> list;}
}
版权声明:
本文来源网络,所有图片文章版权属于原作者,如有侵权,联系删除。
本文网址:https://www.bianchenghao6.com/h6javajc/15662.html