Home 3.Topic and Transformations
Post
Cancel

3.Topic and Transformations

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)


from gensim import corpora,models,similarities
import os
if os.path.exists('./model/dictionary.m'):
    dictionary=corpora.Dictionary.load('./model/dictionary.m')
    print(dictionary)
    corpus=corpora.MmCorpus('./model/corpra.mms')
    print('load dictionary and corpus done')
    #build tf-idf model
    tfidf_model=models.TfidfModel(corpus=corpus, normalize=True)
    tfidf_corpus=tfidf_model[corpus]
    # for corpra in tfidf_corpus:
    #     print(corpra)
    #build lsi model
    lsi_model=models.LsiModel(corpus=tfidf_corpus,id2word=dictionary,num_topics=4)
    lsi_corpus=lsi_model[corpus]
    # for corpra in lsi_corpus:
        # print(corpra)
    print(lsi_model.show_topics(2))
    # lsi_model.add_documents([])#add new document
    # lsi_vec = lsi_model[]#transform to vector
    #random projection model
    rp_model = models.RpModel(tfidf_corpus, num_topics=500)
    #latent Dirichlet Allocation
    lda_model = models.LdaModel(tfidf_corpus, id2word=dictionary, num_topics=100)
    #Hierarchical Dirichlet Process
    hdp_model = models.HdpModel(tfidf_corpus, id2word=dictionary)
else:
    print('file not exsit')
This post is licensed under CC BY 4.0 by the author.