def fenci (one_string) : for _ in range(len(one_string)): # 去掉所有空格 try : one_string=one_string.replace( " " , "" ) except : break def isAllZh (s) : # 判断是否全是中文 for c in s: if not ( '\u4e00' = c = '\u9fa5' ): retur
def fenci(one_string):
for _ in range(len(one_string)): # 去掉所有空格
try:
one_string=one_string.replace(" ","")
except:
break
def isAllZh(s): # 判断是否全是中文
for c in s:
if not ('\u4e00' <= c <= '\u9fa5'):
return False
return True
final_result = []
temp_list = jieba.lcut(one_string)
for word in temp_list:
if isAllZh(word)==False:
continue
# if jieba.get_FREQ(word)==1:
# print(word)
if (len(word)>1 and (jieba.get_FREQ(word)==None or jieba.get_FREQ(word)==0)) or len(word)>3:
jieba.del_word(word) # 强制
final_result.extend(jieba.lcut(word))
else:
final_result.append(word)
return final_result
事实上和HMM=False的结果貌似差不多
print(jieba.lcut('丰田太省了', HMM=False))
print(jieba.lcut('我们中出了一个叛徒', HMM=False))
print(jieba.lcut('丰田太省了', HMM=True))
print(jieba.lcut('我们中出了一个叛徒', HMM=True))