feat: add morning_rating & qt_rating info

main
jackluson 4 years ago
parent 6a390c5774
commit 695903d39a

@ -16,6 +16,7 @@ from fund_info_crawler import FundSpider
from lib.mysnowflake import IdWorker
from time import sleep, time
import pymysql
import pandas
connect = pymysql.connect(host='127.0.0.1', user='root',
password='rootroot', db='fund_work', charset='utf8')
cursor = connect.cursor()
@ -58,17 +59,24 @@ if __name__ == '__main__':
page_start = 0
error_funds = []
output_catch_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
'类型' + '股票总仓位' + '页码' + '备注' + '\n'
'类型' + ',' + '股票总仓位' + ',' + '页码' + ',' + '备注' + '\n'
# 设置表头
result_dir = './output/'
if page_start == 0:
with open(result_dir + 'fund_morning_season_catch.csv', 'w+') as csv_file:
csv_file.write(output_catch_head)
output_catch_error = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
'类型' + '页码' + '备注' + '\n'
'类型' + ',' + '页码' + ',' + '备注' + '\n'
if page_start == 0:
with open(result_dir + 'fund_morning_season_error.csv', 'w+') as csv_file:
csv_file.write(output_catch_error)
df = pandas.read_csv(
result_dir + 'fund_morning_season_error.csv', usecols=[0, 2, 4])
fund_list = df.values.tolist()
# print(len(d[d['代码'].astype(str).str.contains('10535')]))
# print(df[df['代码'].astype(str).str.contains('10535')]
# ['股票总仓位'].values)
def crawlData(start, end):
chrome_driver = login()
@ -103,19 +111,21 @@ if __name__ == '__main__':
lock.acquire()
error_funds.append(each_fund.fund_code)
fund_infos = [each_fund.fund_code, each_fund.morning_star_code,
each_fund.fund_name, each_fund.fund_cat, page_start, '页面跳转有问题']
each_fund.fund_name, record[3], page_start, '页面跳转有问题']
with open(result_dir + 'fund_morning_season_error.csv', 'a') as csv_file:
output_line = ', '.join(str(x)
for x in fund_infos) + '\n'
csv_file.write(output_line)
lock.release()
continue
each_fund.get_fund_manager_info()
each_fund.get_fund_season_info()
# each_fund.get_fund_manager_info()
each_fund.get_fund_morning_rating()
# each_fund.get_fund_season_info()
continue
if each_fund._is_trigger_catch == True:
lock.acquire()
fund_infos = [each_fund.fund_code, each_fund.morning_star_code,
each_fund.fund_name, each_fund.fund_cat,
each_fund.fund_name, record[3],
each_fund.stock_position['stock_total_position'],
page_start, each_fund._catch_detail]
with open(result_dir + 'fund_morning_season_catch.csv', 'a') as csv_file:
@ -135,7 +145,7 @@ if __name__ == '__main__':
threaders = []
start = time()
step_num = 2500
for i in range(3):
for i in range(1):
print(i * step_num, (i+1) * step_num)
t = Thread(target=crawlData, args=(
i * step_num, (i+1) * step_num))

@ -11,7 +11,7 @@ Copyright (c) 2020 Camel Lu
import re
from time import sleep
from bs4 import BeautifulSoup
from utils import parse_cookiestr, set_cookies, login_site
from utils import parse_cookiestr, set_cookies, login_site, get_star_count
from selenium.common.exceptions import NoSuchElementException
@ -41,6 +41,8 @@ class FundSpider:
self.stock_position = dict() # 股票总仓位、前十大持仓
self.risk_assessment = dict() # 标准差 风险系数 夏普比
self.risk_statistics = dict() # 阿尔法 贝塔 R平方值
self.risk_rating = dict() # 风险评价 -- 二年、三年、五年、十年
self.morning_star_rating = dict() # 晨星评级--三年,五年,十年
# 十大持仓信息
self.ten_top_stock_list = [] # 股票十大持仓股信息
# 处理基金详情页跳转
@ -188,6 +190,64 @@ class FundSpider:
# raise # 抛出异常,注释后则不抛出异常
return None
def get_fund_morning_rating(self):
try:
qt_el = self._chrome_driver.find_element_by_id('qt_star')
rating_3_src = qt_el.find_element_by_xpath(
"//li[@class='star3']/img").get_attribute('src')
rating_5_src = qt_el.find_element_by_xpath(
"//li[@class='star5']/img").get_attribute('src')
rating_10_src = qt_el.find_element_by_xpath(
"//li[@class='star10']/img").get_attribute('src')
rating_3 = get_star_count(rating_3_src)
rating_5 = get_star_count(rating_5_src)
rating_10 = get_star_count(rating_10_src)
print(rating_3, rating_5, rating_10)
self.morning_star_rating[3] = rating_3
self.morning_star_rating[5] = rating_5
self.morning_star_rating[10] = rating_10
print(self.morning_star_rating)
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code)
file_name = './abnormal/morning_rating-' + \
self.fund_code + "-no_such_element.png"
# 风险评级
def get_fund_qt_rating(self):
try:
qt_el = self._chrome_driver.find_element_by_id('qt_rating')
rating_2_src = qt_el.find_element_by_xpath(
"//li[5]/img").get_attribute('src')
rating_3_src = qt_el.find_element_by_xpath(
"li[6]/img").get_attribute('src')
rating_5_src = qt_el.find_element_by_xpath(
"li[7]/img").get_attribute('src')
rating_10_src = qt_el.find_element_by_xpath(
"li[8]/img").get_attribute('src')
print(rating_3_src, rating_5_src, rating_10_src)
# //*[@id="qt_rating"]/li[6]/img
rating_2 = re.findall(
r"\d(?:stars\.gif)$", rating_2_src)[0][0]
rating_3 = re.findall(
r"\d(?:stars\.gif)$", rating_3_src)[0][0]
rating_5 = re.findall(
r"\d(?:stars\.gif)$", rating_5_src)[0][0]
rating_10 = re.findall(
r"\d(?:stars\.gif)$", rating_10_src)[0][0]
print(rating_2, rating_3, rating_5, rating_10)
self.risk_rating['2'] = rating_2
self.risk_rating['3'] = rating_3
self.risk_rating['5'] = rating_5
self.risk_rating['10'] = rating_10
print(self.risk_rating)
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code)
file_name = './abnormal/qt_rating-' + self.fund_code + "-no_such_element.png"
def get_fund_season_info(self):
# 投资风格
self.investname_style = self.get_element_text_by_class_name(

@ -101,7 +101,7 @@ def login_site(chrome_driver, site_url, redirect_url=None):
password = chrome_driver.find_element_by_id('pwdValue')
check_code = chrome_driver.find_element_by_id('txtCheckCode')
username.send_keys('18219112108@163.com')
password.send_keys('xxxx')
password.send_keys('w780880')
count = 1
flag = True
while count < 10 and flag:
@ -133,3 +133,21 @@ def login_site(chrome_driver, site_url, redirect_url=None):
if count > 10:
return False
return True
def parse_csv(datafile):
data = []
with open(datafile, "r") as f:
header = f.readline().split(",") # 获取表头
counter = 0
for line in f:
if counter == 10:
break
fields = line.split(",")
entry = {}
for i, value in enumerate(fields):
entry[header[i].strip()] = value.strip() # 用strip方法去除空白
data.append(entry)
counter += 1
return data

Loading…
Cancel
Save