11.5.3 封装聊天服务类
聊天服务类的主要作用有3个:①获取问答知识表的所有记录,并对其创建索引;②从索引文件中检索匹配指定问题的问答知识;③封装一个提供给外部使用的聊天方法。聊天服务类的实现如下:
- 1 package org.liufeng.course.service;
- 2
- 3 import java.io.File;
- 4 import java.util.List;
- 5 import java.util.Random;
- 6 import org.apache.lucene.document.Document;
- 7 import org.apache.lucene.document.IntField;
- 8 import org.apache.lucene.document.StringField;
- 9 import org.apache.lucene.document.TextField;
- 10 import org.apache.lucene.document.Field.Store;
- 11 import org.apache.lucene.index.IndexReader;
- 12 import org.apache.lucene.index.IndexWriter;
- 13 import org.apache.lucene.index.IndexWriterConfig;
- 14 import org.apache.lucene.queryparser.classic.QueryParser;
- 15 import org.apache.lucene.search.IndexSearcher;
- 16 import org.apache.lucene.search.Query;
- 17 import org.apache.lucene.search.ScoreDoc;
- 18 import org.apache.lucene.search.TopDocs;
- 19 import org.apache.lucene.store.Directory;
- 20 import org.apache.lucene.store.FSDirectory;
- 21 import org.apache.lucene.util.Version;
- 22 import org.liufeng.course.pojo.Knowledge;
- 23 import org.liufeng.course.util.MySQLUtil;
- 24 import org.wltea.analyzer.lucene.IKAnalyzer;
- 25
- 26 /**
- 27 * 聊天服务类
- 28 *
- 29 * @author liufeng
- 30 * @date 2013-12-01
- 31 */
- 32 public class ChatService {
- 33 /**
- 34 * 得到索引存储目录
- 35 *
- 36 * @return WEB-INF/classes/index/
- 37 */
- 38 public static String getIndexDir() {
- 39 // 得到.class文件所在路径(WEB-INF/classes/)
- 40 String classpath = ChatService.class.getResource("/").getPath();
- 41 // 将classpath中的%20替换为空格
- 42 classpath = classpath.replaceAll("%20", " ");
- 43 // 索引存储位置:WEB-INF/classes/index/
- 44 return classpath + "index/";
- 45 }
- 46
- 47 /**
- 48 * 创建索引
- 49 */
- 50 public static void createIndex() {
- 51 // 取得问答知识库中的所有记录
- 52 List<Knowledge> knowledgeList = MySQLUtil.findAllKnowledge();
- 53 Directory directory = null;
- 54 IndexWriter indexWriter = null;
- 55 try {
- 56 directory = FSDirectory.open(new File(getIndexDir()));
- 57 IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_46,
- 58 new IKAnalyzer(true));
- 59 indexWriter = new IndexWriter(directory, iwConfig);
- 60 Document doc = null;
- 61 // 遍历问答知识库创建索引
- 62 for (Knowledge knowledge : knowledgeList) {
- 63 doc = new Document();
- 64 // 对question进行分词存储
- 65 doc.add(new TextField("question", knowledge.getQuestion(),
- Store.YES));
- 66 // 对id、answer和category不分词存储
- 67 doc.add(new IntField("id", knowledge.getId(), Store.YES));
- 68 doc.add(new StringField("answer", knowledge.getAnswer(), Store.YES));
- 69 doc.add(new IntField("category", knowledge.getCategory(), Store.YES));
- 70 indexWriter.addDocument(doc);
- 71 }
- 72 indexWriter.close();
- 73 directory.close();
- 74 } catch (Exception e) {
- 75 e.printStackTrace();
- 76 }
- 77 }
- 78
- 79 /**
- 80 * 从索引文件中根据问题检索答案
- 81 *
- 82 * @param content
- 83 * @return Knowledge
- 84 */
- 85 @SuppressWarnings("deprecation")
- 86 private static Knowledge searchIndex(String content) {
- 87 Knowledge knowledge = null;
- 88 try {
- 89 Directory directory = FSDirectory.open(new File(getIndexDir()));
- 90 IndexReader reader = IndexReader.open(directory);
- 91 IndexSearcher searcher = new IndexSearcher(reader);
- 92 // 使用查询解析器创建Query
- 93 QueryParser questParser = new QueryParser(Version.LUCENE_46,
- 94 "question", new IKAnalyzer(true));
- 95 Query query = questParser.parse(QueryParser.escape(content));
- 96 // 检索得分最高的文档
- 97 TopDocs topDocs = searcher.search(query, 1);
- 98 if (topDocs.totalHits > 0) {
- 99 knowledge = new Knowledge();
- 100 ScoreDoc[] scoreDoc = topDocs.scoreDocs;
- 101 for (ScoreDoc sd : scoreDoc) {
- 102 Document doc = searcher.doc(sd.doc);
- 103 knowledge.setId(doc.getField("id").numericValue().intValue());
- 104 knowledge.setQuestion(doc.get("question"));
- 105 knowledge.setAnswer(doc.get("answer"));
- 106
- 107 knowledge.setCategory(doc.getField("category")
- .numericValue().intValue());
- 108 }
- 109 }
- 110 reader.close();
- 111 directory.close();
- 112 } catch (Exception e) {
- 113 knowledge = null;
- 114 e.printStackTrace();
- 115 }
- 116 return knowledge;
- 117 }
- 118
- 119 /**
- 120 * 聊天方法(根据question返回answer)
- 121 *
- 122 * @param openId 用户的OpenID
- 123 * @param createTime 消息创建时间
- 124 * @param question 用户上行的问题
- 125 * @return answer
- 126 */
- 127 public static String chat(String openId, String createTime, String question) {
- 128 String answer = null;
- 129 int chatCategory = 0;
- 130 Knowledge knowledge = searchIndex(question);
- 131 // 找到匹配项
- 132 if (null != knowledge) {
- 133 // 笑话
- 134 if (2 == knowledge.getCategory()) {
- 135 answer = MySQLUtil.getJoke();
- 136 chatCategory = 2;
- 137 }
- 138 // 上下文
- 139 else if (3 == knowledge.getCategory()) {
- 140 // 判断上一次的聊天类别
- 141 int category = MySQLUtil.getLastCategory(openId);
- 142 // 如果是笑话,本次继续回复笑话给用户
- 143 if (2 == category) {
- 144 answer = MySQLUtil.getJoke();
- 145 chatCategory = 2;
- 146 } else {
- 147 answer = knowledge.getAnswer();
- 148 chatCategory = knowledge.getCategory();
- 149 }
- 150 }
- 151 // 普通对话
- 152 else {
- 153 answer = knowledge.getAnswer();
- 154 // 如果答案为空,根据知识id从问答知识分表中随机获取一条
- 155 if ("".equals(answer))
- 156 answer = MySQLUtil.getKnowledSub(knowledge.getId());
- 157 chatCategory = 1;
- 158 }
- 159 }
- 160 // 未找到匹配项
- 161 else {
- 162 answer = getDefaultAnswer();
- 163 chatCategory = 0;
- 164 }
- 165 // 保存聊天记录
- 166 MySQLUtil.saveChatLog(openId, createTime, question, answer, chatCategory);
- 167 return answer;
- 168 }
- 169
- 170 /**
- 171 * 随机获取一个默认的答案
- 172 *
- 173 * @return
- 174 */
- 175 private static String getDefaultAnswer() {
- 176 String []answer = {
- 177 "要不我们聊点别的?",
- 178 "恩?你到底在说什么呢?",
- 179 "没有听懂你说的,能否换个说法?",
- 180 "虽然不明白你的意思,但我却能用心去感受",
- 181 "听得我一头雾水,阁下的知识真是渊博呀,膜拜~",
- 182 "真心听不懂你在说什么,要不你换种表达方式如何?",
- 183 "哎,我小学语文是体育老师教的,理解起来有点困难哦",
- 184 "是世界变化太快,还是我不够有才?为何你说话我不明白?"
- 185 };
- 186 return answer[getRandomNumber(answer.length)];
- 187 }
- 188
- 189 /**
- 190 * 随机生成 0~length-1 之间的某个值
- 191 *
- 192 * @return int
- 193 */
- 194 private static int getRandomNumber(int length) {
- 195 Random random = new Random();
- 196 return random.nextInt(length);
- 197 }
- 198 }
上述代码的主要说明如下。
第65~69行:将knowledge表的4个字段id、question、answer和category都存储到索引中,其中,id、answer和category这3个字段原样(不分词)存储在索引中,而question字段是分词存储,因此聊天是根据question进行检索。
第127~168行:这是聊天机器人的核心业务逻辑。接收到用户发送的消息时,首先从索引中检索是否有匹配的问答知识,如果没有,随机返回一条默认的回复;如果有,则进一步判断问答知识的类型。如果类型为2(笑话),就从joke表中随机查询一条笑话;如果类型为3(上下文),则判断上一条聊天是否与笑话有关,如果是,继续返回笑话;如果类型为1(普通对话),还需要判断当前问答知识是否对应多个答案。
第175~187行:getDefaultAnswer()方法定义了8条默认答案,当机器人不能应答时,随机返回其中一条。