【修改】项目部署说明

This commit is contained in:
luhuibo 2020-03-08 10:54:28 +08:00
parent a6548650cc
commit 35f08e1322
11 changed files with 5402 additions and 249 deletions

View File

@ -0,0 +1,160 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''=================================================
@IDE PyCharm
@Author LuckyHuibo
@Date 2019/10/24 18:16
@Desc
=================================================='''
import jieba
import numpy as np
import collections
from sklearn import feature_extraction
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
def split_sentence(text, punctuation_list='!?。!?'):
"""
将文本段安装标点符号列表里的符号切分成句子将所有句子保存在列表里
"""
sentence_set = []
inx_position = 0 # 索引标点符号的位置
char_position = 0 # 移动字符指针位置
for char in text:
char_position += 1
if char in punctuation_list:
next_char = list(text[inx_position:char_position + 1]).pop()
if next_char not in punctuation_list:
sentence_set.append(text[inx_position:char_position])
inx_position = char_position
if inx_position < len(text):
sentence_set.append(text[inx_position:])
sentence_with_index = {i: sent for i, sent in
enumerate(sentence_set)} # dict(zip(sentence_set, range(len(sentences))))
return sentence_set, sentence_with_index
def get_tfidf_matrix(sentence_set, stop_word):
corpus = []
for sent in sentence_set:
sent_cut = jieba.cut(sent)
sent_list = [word for word in sent_cut if word not in stop_word]
sent_str = ' '.join(sent_list)
corpus.append(sent_str)
vectorizer = CountVectorizer()
transformer = TfidfTransformer()
tfidf = transformer.fit_transform(vectorizer.fit_transform(corpus))
# word=vectorizer.get_feature_names()
tfidf_matrix = tfidf.toarray()
return np.array(tfidf_matrix)
def get_sentence_with_words_weight(tfidf_matrix):
sentence_with_words_weight = {}
for i in range(len(tfidf_matrix)):
sentence_with_words_weight[i] = np.sum(tfidf_matrix[i])
max_weight = max(sentence_with_words_weight.values()) # 归一化
min_weight = min(sentence_with_words_weight.values())
for key in sentence_with_words_weight.keys():
x = sentence_with_words_weight[key]
sentence_with_words_weight[key] = (x - min_weight) / (max_weight - min_weight)
return sentence_with_words_weight
def get_sentence_with_position_weight(sentence_set):
sentence_with_position_weight = {}
total_sent = len(sentence_set)
for i in range(total_sent):
sentence_with_position_weight[i] = (total_sent - i) / total_sent
return sentence_with_position_weight
def similarity(sent1, sent2):
"""
计算余弦相似度
"""
return np.sum(sent1 * sent2) / 1e-6 + (np.sqrt(np.sum(sent1 * sent1)) * \
np.sqrt(np.sum(sent2 * sent2)))
def get_similarity_weight(tfidf_matrix):
sentence_score = collections.defaultdict(lambda: 0.)
for i in range(len(tfidf_matrix)):
score_i = 0.
for j in range(len(tfidf_matrix)):
score_i += similarity(tfidf_matrix[i], tfidf_matrix[j])
sentence_score[i] = score_i
max_score = max(sentence_score.values()) # 归一化
min_score = min(sentence_score.values())
for key in sentence_score.keys():
x = sentence_score[key]
sentence_score[key] = (x - min_score) / (max_score - min_score)
return sentence_score
def ranking_base_on_weigth(sentence_with_words_weight,
sentence_with_position_weight,
sentence_score, feature_weight=[1, 1, 1]):
sentence_weight = collections.defaultdict(lambda: 0.)
for sent in sentence_score.keys():
sentence_weight[sent] = feature_weight[0] * sentence_with_words_weight[sent] + \
feature_weight[1] * sentence_with_position_weight[sent] + \
feature_weight[2] * sentence_score[sent]
sort_sent_weight = sorted(sentence_weight.items(), key=lambda d: d[1], reverse=True)
return sort_sent_weight
def get_summarization(sentence_with_index, sort_sent_weight, topK_ratio=0.3):
topK = int(len(sort_sent_weight) * topK_ratio)
print('topK:{0}'.format(topK))
summarization_sent = sorted([sent[0] for sent in sort_sent_weight[:topK]])
summarization = []
for i in summarization_sent:
summarization.append(sentence_with_index[i])
summary = ''.join(summarization)
return summary
if __name__ == '__main__':
# test_text = '../../data/training17.txt'
# with open(test_text, 'r', encoding='utf-8') as f:
# text = f.read()
text = '''网易娱乐7月21日报道 林肯公园主唱查斯特·贝宁顿Chester Bennington于今天早上在洛杉矶帕洛斯弗迪斯的一个私人庄园自缢身亡年仅41岁。此消息已得到洛杉矶警方证实。
  洛杉矶警方透露Chester的家人正在外地度假Chester独自在家上吊地点是家里的二楼一说是一名音乐公司工作人员来家里找他时发现了尸体也有人称是佣人最早发现其死亡
  林肯公园另一位主唱麦克·信田确认了Chester Bennington自杀属实并对此感到震惊和心痛称稍后官方会发布声明Chester昨天还在推特上转发了一条关于曼哈顿垃圾山的新闻粉丝们纷纷在该推文下留言不相信Chester已经走了
  外媒猜测Chester选择在7月20日自杀的原因跟他极其要好的朋友Soundgarden(声音花园)乐队以及Audioslave乐队主唱Chris Cornell有关因为7月20日是Chris Cornell的诞辰而Chris Cornell于今年5月17日上吊自杀享年52岁Chris去世后Chester还为他写下悼文
  对于Chester的自杀亲友表示震惊但不意外因为Chester曾经透露过想自杀的念头他曾表示自己童年时被虐待导致他医生无法走出阴影也导致他长期酗酒和嗑药来疗伤目前洛杉矶警方仍在调查Chester的死因
  据悉Chester与毒品和酒精斗争多年年幼时期曾被成年男子性侵导致常有轻生念头Chester生前有过2段婚姻育有6个孩子
  林肯公园在今年五月发行了新专辑多一丝曙光One More Light成为他们第五张登顶Billboard排行榜的专辑而昨晚刚刚发布新单Talking To MyselfMV'''
stop_word = []
# 这个停用词表增加了很多中文的
with open('../../data/stopWordList.txt', 'r', encoding='utf-8') as f:
for line in f.readlines():
stop_word.append(line.strip())
sentence_set, sentence_with_index = split_sentence(text, punctuation_list='!?。!?')
tfidf_matrix = get_tfidf_matrix(sentence_set, stop_word)
sentence_with_words_weight = get_sentence_with_words_weight(tfidf_matrix)
sentence_with_position_weight = get_sentence_with_position_weight(sentence_set)
sentence_score = get_similarity_weight(tfidf_matrix)
sort_sent_weight = ranking_base_on_weigth(sentence_with_words_weight,
sentence_with_position_weight,
sentence_score, feature_weight=[1, 1, 1])
summarization = get_summarization(sentence_with_index, sort_sent_weight, topK_ratio=0.3)
print('summarization:\n', summarization)

View File

@ -0,0 +1,303 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''=================================================
@IDE PyCharm
@Author LuckyHuibo
@Date 2019/10/23 18:29
@Desc
=================================================='''
from gensim.models import KeyedVectors
import numpy as np
from textrank4zh import TextRank4Keyword, TextRank4Sentence
from pyltp import SentenceSplitter
import pickle
import re
import jieba
import operator
from functools import reduce
from gensim.models import LdaModel
from gensim.corpora import Dictionary
import gc
class SentenceEmbedding:
# 句子向量化类
def __init__(self):
self.word_frequence = self.__get_word_frequence()
def get_sentences_vec(self, model_wv, sent_list):
# 句子向量化处理
a = 0.001
row = model_wv.vector_size
col = len(sent_list)
sent_mat = np.zeros((row, col))
for i, sent in enumerate(sent_list):
length = len(sent)
if length == 0: continue
sent_vec = np.zeros(row)
for word in sent:
pw = self.word_frequence[word]
if pw == 0: continue
w = a / (a + pw)
# print(w)
try:
vec = np.array(model_wv[word])
sent_vec += w * vec
except:
pass
sent_mat[:, i] += sent_vec
sent_mat[:, i] /= length
# PCA处理
# print(sent_mat.shape)
sent_mat = np.mat(sent_mat)
u, s, vh = np.linalg.svd(sent_mat)
sent_mat = sent_mat - u * u.T * sent_mat
return sent_mat
def __get_word_frequence(self):
# 这里不做停用次处理,直接在计算句子向量时候,如果找不到该词,直接跳过
path = Myconfig.get_path('frequency.txt')
assert path
with open(path, 'rb') as f:
word_frequence = pickle.load(f)
return word_frequence
# 计算余弦相似度
def cos_similarity(self, v1, v2):
assert isinstance(v1, np.ndarray)
assert isinstance(v2, np.ndarray)
# 输入向量维度不一致
if len(v1) != len(v2):
return 0
if np.linalg.norm(v2) == 0 or np.linalg.norm(v1) == 0:
return 0
return np.vdot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
# 返回句子向量矩阵中各列向量与第一列向量的相似度
def __calcu_similarity(self, sent_mat):
assert (isinstance(sent_mat, np.ndarray) or isinstance(sent_mat, np.matrix))
# 采用点积的方法计算
first = np.array(sent_mat[:, 0]).flatten()
col = sent_mat.shape[1]
sims = []
for i in range(1, col):
vec = np.array(sent_mat[:, i]).flatten()
sims.append(self.cos_similarity(first, vec))
return sims
# 获取相似度结果#输入句子中每一句和首句的相似度
def get_similarity_result(self, model_wv, sent_list):
sent_mat = self.get_sentences_vec(model_wv, sent_list)
sim = self.__calcu_similarity(sent_mat)
return sim
# def test(sens, sim):
# print('##################################')
# index = list(np.argsort(sim))
# index.reverse()
# for i in index:
# print(sim[i], sens[i])
class Summarization:
def __init__(self):
self.position_re_weight = True
self.Sen_Embedding = SentenceEmbedding()
self.stopwords = self.__get_stopwords()
fname = Myconfig.get_path('vec.kv') # 或取模型目录
assert fname
self.model_wv = KeyedVectors.load(fname, mmap='r')
def __get_stopwords(self):
path = Myconfig.get_path('stopwords.txt')
stopwords = []
with open(path, encoding='GBK') as f:
line = f.readline()
while line != '':
stopwords.append(line.strip('\n'))
line = f.readline()
stopwords.append(' ')
return set(stopwords)
def __get_keyword(self, string):
tr4w = TextRank4Keyword()
tr4w.analyze(text=string, lower=True, window=4)
keyword_items = tr4w.get_keywords(10, word_min_len=2)
# 把权重标准化
keyword_items = sorted(keyword_items, key=lambda x: x.weight)
over_length = keyword_items[-1].weight
for wp in keyword_items:
wp.weight /= over_length
return keyword_items
# 用正则表达式进行切句
def __split_sentence(self, string):
pattern = re.compile('[。,,.?!""“”]')
pattern1 = re.compile('\w+?([。,,.?!""“”])')
flags = pattern1.findall(string)
sentences = pattern.sub('***', string).split('***')
sentences = [sen for sen in sentences if sen != '']
if (len(sentences) > len(flags)): flags.append('.')
# 把句子长度小于4的剔除一般这些都是转折等过渡语句会干扰句子提取
filter_index = [i for i in range(len(sentences)) if len(sentences[i]) >= 4]
sentences = [sentences[i] for i in filter_index]
flags = [flags[i] for i in filter_index]
return sentences, flags
# 用pyltp模型进行切句
def __cut_sentence(self, string):
"""@string contain many sentence"""
sents = SentenceSplitter.split(string) # 分句
sents = [sen for sen in sents if len(sen) > 4]
return sents, None
def __get_tokens(self, sentences):
sen_tokens = []
for i, sen in enumerate(sentences):
sen_tokens.append([])
words = jieba.cut(sen)
for wp in words:
if wp not in self.stopwords:
sen_tokens[i].append(wp)
return sen_tokens
# 获取文章主题
# 可以根据文章主题和摘要主题进行相似度计算,如果相似度过低,
# 可以重新调整各方面权重重新提取摘要单句进行主题对比LDA模型效果不好词太少
def __theme_re_weight(self, tokens):
dictionary = Dictionary(tokens)
corpus = [dictionary.doc2bow(text) for text in tokens]
lda = LdaModel(corpus=corpus, id2word=dictionary, num_topics=2, passes=20)
topic = []
topic.append(lda.show_topic(topicid=0, topn=8))
topic.append(lda.show_topic(topicid=1, topn=8))
return topic
def __knn_soft(self, sim):
window = 2
wight = np.array([0.1, 0.125, 0.5, 0.125, 0.1])
sim = [sim[0]] * window + sim + [sim[-1]] * window
sim = np.array(sim)
sim = [np.dot(sim[i - window:i + window + 1], wight)
for i in range(window, len(sim) - window)]
return sim
# 考虑标题的影响权重
def __title_re_weight(self, sim, sim_title):
sim = np.array(sim)
sim_title = np.array(sim_title)
p = 0.7
sim = p * sim + (1 - p) * sim_title
return list(sim)
# 考虑关键字对摘要的影响权重
def __keywords_re_weight(self, keywords, sim, tokens):
for wp in keywords:
for i, token in enumerate(tokens):
if wp.word in token:
sim[i] = sim[i] + 0.02 * wp.weight # 添加关键字的权重
return sim
# 考虑首位句子的影响权重
def __startend_re_weight(self, sents, sim):
if (len(sents[0]) > 20):
sim[0] = sim[0] + 0.1
return sim
def get_summrazation(self, string, num, title=None):
# sentences, flags = self.__split_sentence(string)
sentences, flags = self.__cut_sentence(string)
tokens = self.__get_tokens(sentences)
tokens_all = reduce(operator.add, tokens)
new_tokens = [tokens_all] + tokens
sim = self.Sen_Embedding.get_similarity_result(self.model_wv, new_tokens)
# test(sentences, sim) # testpoint
assert len(sim) == len(tokens)
keywords = self.__get_keyword(string)
# print(keywords)
# 根据关键字重新更新一次权值
sim = self.__keywords_re_weight(keywords, sim, tokens)
# test(sentences, sim) # testpoint
# 如果有标题,则根据标题更新一次权值
if title:
title_tokens = self.__get_tokens([title])
new_tokens = title_tokens + tokens
sim_title = self.Sen_Embedding.get_similarity_result(self.model_wv, new_tokens)
sim = self.__title_re_weight(sim, sim_title)
# 根据首尾位置更新一次权值
if self.position_re_weight:
sim = self.__startend_re_weight(sentences, sim)
# test(sentences, sim) # testpoint
sim = self.__knn_soft(sim) ##knn soft
# test(sentences, sim) # testpoint
assert len(sim) == len(tokens)
index = list(np.argsort(sim))
index = index[-num:] ##取值最高的num项
index.sort() ##排序
# 把标点也合并
abstract = []
if flags:
for i in index:
abstract.append(sentences[i])
abstract.append(flags[i])
else:
abstract = [sentences[i] for i in index]
topic = self.__theme_re_weight(tokens)
keywords = [(wp.word, wp.weight) for wp in keywords]
# for wp in keywords:
# result['keywords'].append({'cat': 'a', 'name': wp.word, 'value': 30, 'pro':wp.weight})
return ''.join(abstract), keywords, topic
def data_format(abstract, keywords, topic):
keywords = sorted(keywords, key=lambda x: x[1])
length_range = keywords[-1][1]
result = {}
result['keywords'] = []
for i, wp in enumerate(keywords):
result['keywords'].append({'cat': i,
'name': wp[0],
'value': round(10 + 50 * wp[1] / length_range, 2),
'pro': round(float(wp[1]), 4)})
result['summarization'] = abstract
topic_new = []
for tp in topic:
temp = []
for wp in tp:
temp.append({"name": wp[0], 'value': round(float(wp[1]), 4)})
topic_new.append(temp)
result['topics'] = topic_new
return result
class My_Summrazation:
# 外部接口类,把本文件功能全部集成在该类
def __init__(self):
self.Summ = Summarization()
def get_results(self, text, num, title=None):
# try:
return data_format(*self.Summ.get_summrazation(text, num, title))
# except:
# return None
def release(self):
del self.Summ.model_wv
gc.collect()
if __name__ == "__main__":
pass

View File

@ -0,0 +1,30 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''=================================================
@IDE PyCharm
@Author Valuebai
@Date 2019/11/15 17:27
@Desc
=================================================='''
import pkuseg
'''
Title 标题
如果该文本有标题那么标题可以帮助我们很多
在之前我们计算每个句子与文章整体的相似度是对每个子句与整体文章进行相似度距离计算
那么我们这个时候就可以把标题的embedding结果拿出来
那么每句话的相似度就是这句话与整体文章的相似度和标题的相似度的一个结合
'''
title = 'some words'
content = 'more and more words'
sentene_vec_title = get_sentence_vec(title)
sentene_vec_content = get_sentence_vec(content)
#对于一个子句 sub_sen_n, 以前的similarity是 cosine(get_sentene_vec(sub_sen_n), sentene_vec_content)
#现在可以是
p = 0.5
sen_vec = get_sentene_vec(sub_sen_n)
similarity = p * cosine(sen_vec, sentene_vec_title) + (1 - p) * cosine(sen_vec, sentene_vec_content)
# 当然这里的p以及p和cosine的构建都是可以变化的。 p 和 1-p是线性关系可以是其他的关系。自己定即可

270
README.md
View File

@ -1,33 +1,40 @@
# Text-Auto-Summarization 文本自动摘要
![Languages](https://img.shields.io/badge/Languages-Python3.6-green)
![Languages](https://img.shields.io/badge/Languages-Python3.6.5-blue)
![Build](https://img.shields.io/badge/Build-passing-brightgreen.svg)
![License](https://img.shields.io/badge/License-MIT-orange.svg)
![Contributions](https://img.shields.io/badge/Contributions-welcome-ff69b4.svg)
<p align="center">
<!--快速在当前页面跳转的-->
<a href="#quick-start">Quick Start</a>
## 项目展示
<a href="# projects">Projects</a> :•
<a href="## Textrank 和 Pagerank">Textrank 和 Pagerank</a>
<a href="## 核心算法详解采用Extraction">核心算法详解</a>
http://111.229.74.215:8188/TextSummarization/
<a href="#deploy">Deploy</a> :•
<a href="## Ptyhon创建虚拟环境">Ptyhon创建虚拟环境</a>
<a href="## Requirements">Requirements</a>
<a href="## linux部署指南">linux部署指南</a>
<a href="## linux上杀死gunicorn的进程">linux上杀死gunicorn的进程</a>
<a href="## linux根据端口号查找项目路径方法">linux根据端口号查找项目路径方法</a>
![demo picture](./static/images/demo.jpg)
<a href="# 前端页面">前端页面</a>
<!--a href="http://developers.tron.network">Documentation</a-->
<!--a href="#resource">Resource</a-->
</p>
## 项目部署
# quick-start
### 正常步骤
1. 将项目代码克隆到本地git clone https://github.com/Valuebai/Text-Auto-Summarization.git
2. 安装python环境本项目使用python3.6.5
3. 安装pip包pip install -r requirements.txt
4. 运行项目看效果python run.py
5. 项目运行正常linux上可使用下面的命令让其在后台运行
```
sh run.sh 或者 . run.sh 运行
```
### 为Python项目创建独立的虚拟环境可选
[python在win/linux创建虚拟环境](https://blog.csdn.net/luhuibo318/article/details/94011917)
### 新购买的linux-CentOS7 云服务器部署指南
1. [【Linux】CentOS-常用命令&新购买云服务器安装必看](https://github.com/Valuebai/awesome-python-io/issues/1)
2. [总结python+flask项目在linux部署的五大方法](https://blog.csdn.net/luhuibo318/article/details/102688154)
【核心技术】待整理进来 https://github.com/ZhiWenMo/Autosummarization_self_dis/blob/master/Autosummarization.ipynb
## 目前的摘要技术分为
1. Extraction 抽取式
@ -236,231 +243,6 @@ jieba.initialize()
# deploy
## Ptyhon创建虚拟环境
### 方法一:自带命令
1. 进入文件夹目录
2. python -m venv -h 可查看帮助信息
3. 下面的
```
Linux运行命令行
$ 创建默认环境python3 -m venv my_venv
$ 创建指定环境python3.6 -m venv my_venv, python2 -m venv my_venv(添加到系统环境变量中)
$ 激活环境:. my_venv/bin/activate (. 或者 source )
$ 退出环境deactivate
Windows系统运行cmd使用 "py" Python 启动器命令配合 "-m" 开关选项:
$ 创建环境py -3 -m venv my_venv (或者python -m venv my_venv)
$ 创建指定环境py -3.6 -m venv my_venv, py -3.7 -m venv my_venv (添加到系统环境变量中)
$ 激活环境my_venv\Scripts\activate.bat
$ 退出环境deactivate
执行后,会在目录前方出现<my_venv>表明已进入虚拟环境
安装项目:
$ pip install -r requirements.txt
```
### 方法二Windows在PyCharm下创建虚拟环境
1. 安装并激活PyCharm
这个请自行安装
官方地址https://www.jetbrains.com/pycharm/
2. 在PyCharm下创建虚拟环境
第一步点击New Project
第二步选择下图的New environment
第三步点击create即可
pycharm会为新创建的项目自动建立一个虚拟环境
### 方法三conda创建虚拟环境
[anaconda中的常用操作](https://blog.csdn.net/CampusAmour/article/details/83215524)
Linux下启动其终端命令行
$ source ~/anaconda3/bin/activate root
$ anaconda-navigator
- 创建虚拟环境conda create -n env_name python=3.6
- 同时安装必要的包conda create -n env_name numpy matplotlib python=3.6
- 激活虚拟环境
- Linuxsource activate your_env_name(虚拟环境名称)
- Windowsactivate your_env_name(虚拟环境名称)
- 退出虚拟环境:
- Linuxsource deactivate your_env_name(虚拟环境名称)
- Windowsdeactivate your_env_name(虚拟环境名称)
- 删除虚拟环境conda remove -n your_env_name(虚拟环境名称) --all
- 删除包使用命令conda remove --name $your_env_name $package_name包名)
conda常用命令
- 查看已安装的包conda list
- 安装包conda install package_name(包名)
- 查看当前存在的虚拟环境conda env list 或 conda info -e
- 检查更新当前condaconda update conda
## Requirements
- 生成指南:
- 第一步:安装包 pip install pipreqs
- 第二步在对应路径cmd输入命令生成 requirements.txt文件pipreqs ./ --encoding=utf8 --force 避免中文路径报错
- 第三步下载该代码后直接pip install -r requirements.txt
- 或者创建虚拟环境安装
@[TOC](文章目录) #在CSDN自动生成目录
## linux部署指南
### 1. linux sh & nohup后台运行python脚本
- 1创建脚本vim run.sh
- 2填写内容并保存nohup python3 -u run.py > nohup.log 2>&1 &
- 3运行sh run.sh 或者 . run.sh
- 参考:[Linux sh、source和.命令执行.sh文件的区别](https://www.zengdongwu.com/article3.html) +
[linux后台执行命令&和nohup](https://blog.csdn.net/liuyanfeier/article/details/62422742)
```md
- nohup : 就是不挂起的意思( no hang up),可以在你退出帐户之后继续运行相应的进程
- 使用&命令后,作业被提交到后台运行,当前控制台没有被占用,但是一但把当前控制台关掉(退出帐户时)作业就会停止运行。nohup命令可以在你退出帐户之后继续运行相应的进程。
- python3 -u run.py : 执行py文件
- -u的意思就是 uninterrupt不中断的意思如果你的代码里边有sleep等线程沉睡相关的操作如果你不-u的话 在后台 它就停住了
- > nohup.log : 重定向保存日志到当前路径下的nohup.log
- 2>&1 : 将标准出错也输出到nohup.log文件中
- & : 最后一个& 是让该命令在后台执行。
```
### 2. 使用gunicorn 部署flask服务 (个人项目推荐使用这个)
- 1创建脚本vim gunicorn.sh
- 2填写内容并保存
- conda activate just_do_it 在linux上创建好自己的环境可选
- nohup gunicorn -w 4 -b 0.0.0.0:8001 run:app & (不带日志)
- nohup gunicorn -w 4 -b 0.0.0.0:8001 run:app > gunicorn.log 2>&1 & (带日志)
- 3运行sh gunicorn.sh 或者 . gunicorn.sh
```md
需要提前pip install gunicorn
简单地gunicorn可以通过gunicorn -w 4 -b 0.0.0.0:8001 run:app启动一个Flask应用。其中,
-w 4是指预定义的工作进程数为4
-b 127.0.0.1:4000指绑定地址和端口
run是flask的启动python文件app则是flask应用程序实例
其中run.py中文件的可能形式是
# run.py
from flask import Flask
app = Flask(__name__)
参考文章:
gunicorn部署Flask服务 https://www.jianshu.com/p/fecf15ad0c9a
https://www.cnblogs.com/gaidy/p/9784919.html
```
### 3. 使用screen命令部署
- 第一步screen -S yourname新建一个叫yourname的session
- 第二步python run.py运行代码关闭shell连接后还会一直在linux上跑
- 针对用户量小的情况,快速部署(本次使用这个)
- 关于screen详情见https://www.cnblogs.com/mchina/archive/2013/01/30/2880680.html
```
杀死所有命令的ps aux|grep 你的进程名|grep -v grep | awk '{print $2}'|xargs kill -9
https://www.hutuseng.com/article/how-to-kill-all-detached-screen-session-in-linux
```
### 4. 使用flask + nginx + uwsgi (不建议因Flask 与 uWsgi 结合有许多难以处理的 bug)
- 针对用户访问量大的情况,具体参考下面的文章
- https://blog.csdn.net/spark_csdn/article/details/80790929
- https://www.cnblogs.com/Ray-liang/p/4173923.html
- https://blog.csdn.net/daniel_ustc/article/details/9070357
### 5. 使用flask + nginx + gunicorn (大项目推荐使用这个)
- 生产环境很多大公司采用这个方式的,故推荐这个
- 因Flask 与 uWsgi 结合有许多难以处理的 bug故推荐这个
- [Flask + Gunicorn + Nginx 部署](https://www.cnblogs.com/Ray-liang/p/4837850.html)
## linux上杀死gunicorn的进程
**方法一**
1. netstat -nltp | grep 8188
能看到类似下面的:
tcp 0 0 0.0.0.0:8188 0.0.0.0:* LISTEN 23422/gunicorn: mas
2. kill -9 23422换成你的
**方法二**
1. 获取Gunicorn进程树
```
pstree -ap|grep gunicorn
得到的结果如下
Python
| | |-grep,14519 --color=auto gunicorn
| -gunicorn,28097 /usr/local/bin/gunicorn query_site.wsgi:application -c ...
| |-gunicorn,14226 /usr/local/bin/gunicorn query_site.wsgi:application -c ...
| | |-{gunicorn},14229
| | |-{gunicorn},14230
...
```
2. 重启Gunicorn任务
kill -HUP 14226
3. 退出Gunicorn任务
kill -9 28097
## linux根据端口号查找项目路径方法
### 1. 只知道端口号
#### 方法一
**1. 根据端口号查询进程 比说6379**
```
netstat -lnp|grep 6379
```
**2. 根据进程号,查询寻程序路径**
```
ll /proc/2757
```
这样就找到了程序路径
#### 方法二
**1. 首先根据端口号查找进程**
```
netstat -nltp
或者
netstat -nltp | grep python
或者
netstat -apn |grep 10010
```
**2. 然后根据进程号去查找项目路径**
```
ps -ef |grep 8567
```
**3. 如果你第二步没有找到项目路径的话,尝试用**
```
lsof -p 8567
```
### 2. 如果知道项目部署在tomcat里
如果你的项目在linux 中是部署到tomcat容器里可以输入下边的命令找到如下:
```
ps anx|grep tomcat
```
# 前端页面
## Flask 快速完成前端页面

Binary file not shown.

File diff suppressed because one or more lines are too long

2
run.py
View File

@ -47,3 +47,5 @@ if __name__ == "__main__":
logger.info('is_dev_mode:{}'.format(is_dev_mode))
# main run
app.run(host='0.0.0.0', port=8188)

BIN
static/images/demo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 81 KiB

BIN
static/images/home.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

View File

@ -22,8 +22,11 @@
<body>
<H1 align="center">Welcome to my Home</H1>
<div style="text-align: center;">
<img src="https://user-images.githubusercontent.com/9695113/58942751-f4a5a100-87b0-11e9-9116-915f85c5f65a.jpg"
alt="上海鲜花港 - 郁金香"/>
<img src="./static/images/home.jpg"
alt="god bless you"
width="600px"
height="400px"
/>
</div>
<div class="absoluteCenter">
<H3 align="center">耶和华是我的牧者,我必不致缺乏。</H3>