Using the following code, you can generate a word cloud from any Chinese text, which is composed of jieba and NLPIR2016, and the word is generated by WorldCloud. By default, 20 new words will be automatically found in the text and added to the thesaurus, of course, you can also manually add or add user thesaurus through TXT There are very detailed setup instructions and code explanations in e, and if you want to learn more about them, you can refer to the list of blogs I provided in Part 2
# - * - coding: Utf-8 -*- # # Created by :'2017/5/23' # email: # CSDN: from os import path from scipy.misc import imread import matplotlib.pyplot as plt import Jieba from nlpir import * from ctypes import * # jieba.load_userdict(" TXT \userdict.txt") # ImportUserDict(' userdi.txt ') # import wordcloud from wordcloud, ImageColorGenerator import sys reload(sys) sys. setDefaultencoding (' utF-8 ') # Dirname ('.') d = path.dirname(__file__) stopwords = {} isCN = 1 # Enable isJieba = 0 # by default IsGetNewWords = 1 # number = 20 # back_coloring_path = "Img /lz1.jpg" # set the background image path text_path = 'TXT /lztest.txt' # set the text path to analyze font_path = 'D:\Fonts\simkai. TTF' # Didn't set Chinese font path for worldcloud stopwords_path = 'stopwords \ stopwords1893 TXT' # stop words glossary imgname1 = "WordCloudDefautColors. PNG #" Imgname2 = "wordCloudColorsByimg.png" # Saved image name2 (color is generated according to background image color layout) my_words_list = [' Lumingfu '] # Back_coloring = imread(path.join(d, coloring) Wc = WordCloud(font_path=font_path, # background_color="white", Max_font_size =100, random_state=42, width=1000, color = coloring, color = coloring, color = coloring, color = coloring, color = coloring Height =860, margin=2, # set the default image size, but if you use the background image, then save the image size) # def add_word(text, number): strs1 = getNewWordsByNLPIR(text, number) if isJieba == 0: if isGetNewWords == 1: for i in strs1: AddUserWord(i) for i in my_words_list: AddUserWord(i) else: if isGetNewWords == 1: for i in strs1: jieba.add_word(i) for i in my_words_list: Join (d, text_path)). Read () # jieba jiebaclearText(text): mywordlist = [] seg_list = jieba.cut(text, cut_all=False) liststr = "/ ".join(seg_list) f_stop = open(stopwords_path) try: f_stop_text = f_stop_text = unicode(f_stop_text, 'utf-8') finally: f_stop.close() f_stop_seg_list = f_stop_text.split('\n') for myword in liststr.split('/'): if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1: Mywordlist.append (myword) return ". Join (mywordList) # def getNewWordsByNLPIR(text, number): txt1 = GetNewWords(text, c_int(number), [c_char_p, c_int, c_bool]) txt2 = txt1.split('#') txt3 = [] txt4 = [] txt5 = [] for item2 in txt2: txt3.append(item2.encode('utf-8').split('/')) if txt3 ! = []: txt4.append(txt3) txt3 = [] for i in txt4: for j in i: if j[0] ! = [] and j[0] ! Def useNLPIR2016(text): = ": txt5.append(j[0]) return txt5 # def useNLPIR2016(text): txt = seg(text) seg_list = [] for t in txt: Seg_list.append (t[0].encode('utf-8')) return seg_list # def NLPIRclearText(seg_list): mywordlist = [] liststr = "/ ".join(seg_list) f_stop = open(stopwords_path) try: f_stop_text = f_stop_text = unicode(f_stop_text, 'utf-8') finally: f_stop.close() f_stop_seg_list = f_stop_text.split('\n') for myword in liststr.split('/'): if not (myword.strip() in f_stop_seg_list) and len(myword.strip()) > 1: Join (myWordList) # if isCN == 1: add_word(text, number) if isJieba == 0: seg_list = useNLPIR2016(text) text = NLPIRclearText(seg_list) text = unicode(text, encoding='utf8') else: Add_word (my_words_list) text = jiebaclearText(text) # Generate (text) print text # generate_from_frequencies function wc. Generate (text) print text # after we calculate the word frequency Wc. Generate_from_frequencies (txt_freq) # txt_freq example for [(a ' 'word, 100), a (b' word ', 90), (c 'word', 83)] # Coloring = ImageColorGenerator(back_coloring) plt.figure() # Coloring = ImageColorGenerator(back_coloring) Plt.axis ("off") # wc.to_file(path.join(d, imgname1)) image_colors = ImageColorGenerator(back_coloring) plt.imshow(wc.recolor(color_func=image_colors)) Plt.axis ("off") # Coloring plt.figure() plt.imshow(back_coloring, Cmap plt.axis("off") # save images wc.to_file(path.join(d, imgname2))Copy the code