nlp教程java - 编程好6文档

package com.nunu.ai.nlp.controller;

import edu.stanford.nlp.coref.CorefCoreAnnotations; import edu.stanford.nlp.coref.data.CorefChain; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.pipeline.Annotation; import edu.stanford.nlp.pipeline.StanfordCoreNLP; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.StringUtils; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController;

import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Properties;

created with IDEA *
@author:huqm
@date:2020/12/7
@time:16:20 <p> *

</p> */ @RestController @RequestMapping(”/test”) public class TestController {

@RequestMapping(“getStr”) public List<String> getStr(String text){

</span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> segInCh(text);

} public List<String> segInCh(String text) {

</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">载入properties 文件

// StanfordCoreNLP pipline = new StanfordCoreNLP(“StanfordCoreNLP-chinese.properties”);

</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">1.2 自定义功能 （1） </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">StanfordCoreNLP的各个组件（annotator）按“tokenize（分词）, ssplit（断句）, pos（词性标注）, lemma（词元化）, </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> ner（命名实体识别）, parse（语法分析）, dcoref（同义词分辨）”顺序进行处理。

// Properties properties = new Properties(); // properties.setProperty(“annotators”, “tokenize, ssplit, pos, lemma, ner, parse, dcoref”); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);

</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(2) 自己在项目中建一个properties 文件，然后在文件中设置模型属性，可以参考1中的配置文件

// String[] args = new String[]{“-props”, “properies/CoreNLP-Seg-CH.properties”}; // Properties properties = StringUtils.argsToProperties(args); // StanfordCoreNLP pipline = new StanfordCoreNLP(properties);

</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">自定义功能(3)</span> StanfordCoreNLP pipline = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> StanfordCoreNLP(PropertiesUtils.asProperties( </span>"annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref"<span style="color: rgba(0, 0, 0, 1)">, </span>"ssplit.isOneSentence", "true"<span style="color: rgba(0, 0, 0, 1)">, </span>"tokenize.language", "zh"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz"<span style="color: rgba(0, 0, 0, 1)">, </span>"segment.sighanPostProcessing", "true"<span style="color: rgba(0, 0, 0, 1)"> )); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">创建一个解析器，传入的是需要解析的文本</span> Annotation annotation = <span style="color: rgba(0, 0, 255, 1)">new</span><span style="color: rgba(0, 0, 0, 1)"> Annotation(text); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">解析</span>

pipline.annotate(annotation);

</span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">根据标点符号，进行句子的切分，每一个句子被转化为一个CoreMap的数据结构，保存了句子的信息()</span> List&lt;CoreMap&gt; sentences = annotation.get(CoreAnnotations.SentencesAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)">从CoreMap 中取出CoreLabel List ,打印</span> ArrayList&lt;String&gt; list = <span style="color: rgba(0, 0, 255, 1)">new</span> ArrayList&lt;&gt;<span style="color: rgba(0, 0, 0, 1)">(); </span><span style="color: rgba(0, 0, 255, 1)">for</span><span style="color: rgba(0, 0, 0, 1)"> (CoreMap sentence : sentences) { </span><span style="color: rgba(0, 0, 255, 1)">for</span> (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">)) { String word </span>= token.get(CoreAnnotations.TextAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the POS tag of the token</span> String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); </span><span style="color: rgba(0, 128, 0, 1)">//</span><span style="color: rgba(0, 128, 0, 1)"> this is the NER label of the token</span> String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String lemma </span>= token.get(CoreAnnotations.LemmaAnnotation.<span style="color: rgba(0, 0, 255, 1)">class</span><span style="color: rgba(0, 0, 0, 1)">); String result</span>=word+" "+pos+" "+lemma+" "+<span style="color: rgba(0, 0, 0, 1)">ne; list.add(result); } } </span><span style="color: rgba(0, 0, 255, 1)">return</span><span style="color: rgba(0, 0, 0, 1)"> list;

}

上一篇： Java 运算符菜鸟教程

下一篇： java微信开发教程 pdf

版权声明：
本文来源网络，所有图片文章版权属于原作者，如有侵权，联系删除。

本文网址：https://www.bianchenghao6.com/h6javajc/15662.html

相关文章：