feat: change get star level methods

main
jackluson 2 years ago
parent f155559f29
commit c2b29271a8

@ -33,28 +33,28 @@ def main():
9.股票持仓基金汇总\n \
10.高分基金\n \
输入")
if input_value == '1' or input_value == '快照':
page_index = 486
if input_value == '1':
page_index = 0
get_fund_list(page_index) # 执行申万行业信息入库
elif input_value == '2' or input_value == '新基入库':
elif input_value == '2':
acquire_fund_base() # 执行行业股票信息入库
elif input_value == '3' or input_value == "季度信息":
elif input_value == '3':
acquire_fund_quarter()
elif input_value == '4' or input_value == "基金状态归档":
elif input_value == '4':
fund_supplement = FundSupplement()
# 补充基金清算维度信息
fund_supplement.update_archive_status()
elif input_value == '5' or input_value == "组合持仓明细":
elif input_value == '5':
get_special_fund_code_holder_stock_detail()
elif input_value == '6' or input_value == "基金持仓股排名":
elif input_value == '6':
all_stocks_rank()
elif input_value == '7' or input_value == "基金重仓股Top100":
elif input_value == '7':
t100_stocks_rank()
elif input_value == '8' or input_value == "股票持仓基金明细":
elif input_value == '8':
all_stock_holder_detail()
elif input_value == '9' or input_value == "股票持仓基金汇总":
elif input_value == '9':
calculate_quarter_fund_count()
elif input_value == '10' or input_value == "高分基金":
elif input_value == '10':
output_high_score_funds()
else:
print('输入有误')

@ -10,3 +10,5 @@ Pillow==8.3.1
python-dotenv==0.19.0
cryptography==37.0.4
lxml==4.9.1
scikit-image==0.19.3
sewar==0.4.5

@ -10,7 +10,7 @@ Copyright (c) 2020 Camel Lu
'''
from threading import Lock, current_thread
from time import sleep
from time import sleep, time
from pprint import pprint
from fund_info.crawler import FundSpider
from fund_info.api import FundApier
@ -39,8 +39,7 @@ def get_total_asset(fund_code, platform):
def acquire_fund_quarter():
lock = Lock()
each_fund_query = FundQuery()
record_total = each_fund_query.select_quarter_fund_total() # 获取记录条数
print('record_total', record_total)
idWorker = IdWorker()
result_dir = './output/'
fund_csv = FundCSV(result_dir)
@ -52,6 +51,7 @@ def acquire_fund_quarter():
chrome_driver = login_morning_star(login_url, False)
page_start = start
page_limit = 10
try:
while(page_start < end):
results = each_fund_query.select_quarter_fund(
page_start, page_limit)
@ -61,6 +61,8 @@ def acquire_fund_quarter():
# 001811 中欧明睿新常态混合A
each_fund = FundSpider(
record[0], record[1], record[2], chrome_driver)
each_fund.set_found_data(record[3])
is_error_page = each_fund.go_fund_url()
# 是否能正常跳转到基金详情页没有的话写入csv,退出当前循环
if is_error_page == True:
@ -111,7 +113,6 @@ def acquire_fund_quarter():
first_manager_id = manager_item['manager_id']
if first_manager_start_date == None:
first_manager_start_date = manager_item['manager_start_date']
manager_assoc_data = {
'quarter_index': quarter_index,
'manager_start_date': manager_item['manager_start_date'],
@ -121,12 +122,13 @@ def acquire_fund_quarter():
manager_assoc = ManagerAssoc(**manager_assoc_data)
manager_assoc.upsert()
# fund_insert.insert_fund_manger_info(manager_dict)
init_total_asset = each_fund.total_asset
quarterly_dict = {
'id': snow_flake_id,
# 'id': snow_flake_id,
'quarter_index': each_fund.quarter_index,
'fund_code': each_fund.fund_code,
'investname_style': each_fund.investname_style,
'total_asset': each_fund.total_asset,
# 'total_asset': each_fund.total_asset,
'manager_id': first_manager_id, # 暂时存第一个基金经理信息
'manager_start_date': first_manager_start_date, # 暂时存第一个基金经理信息
'three_month_retracement': each_fund.three_month_retracement,
@ -149,7 +151,7 @@ def acquire_fund_quarter():
'morning_star_rating_5': each_fund.morning_star_rating.get(5),
'morning_star_rating_10': each_fund.morning_star_rating.get(10),
}
fund_insert.fund_quarterly_info(quarterly_dict)
# 入库十大股票持仓
stock_position_total = each_fund.stock_position.get(
'total', '0.00')
@ -171,9 +173,9 @@ def acquire_fund_quarter():
stock_dict[portion_key] = temp_stock['stock_portion']
market_key = prefix + 'market'
stock_dict[market_key] = temp_stock['stock_market']
fund_insert.fund_stock_info(stock_dict)
# 获取同类基金,再获取同类基金的总资产
if each_fund.fund_name.endswith('A'):
if each_fund.fund_name.endswith('A') or each_fund.fund_name.endswith('B') or each_fund.fund_name.endswith('C'):
similar_name = each_fund.fund_name[0:-1]
results = each_fund_query.select_similar_fund(
similar_name) # 获取查询的所有记录
@ -181,7 +183,19 @@ def acquire_fund_quarter():
for i in range(0, len(results)):
item = results[i]
item_code = item[0]
if item_code == each_fund.fund_code:
continue
print("item_code", item_code, platform )
total_asset = get_total_asset(item_code, platform)
init_total_asset = init_total_asset - total_asset
manager_assoc_data = {
'quarter_index': quarter_index,
'manager_start_date': manager_item['manager_start_date'],
'manager_id': manager_item['manager_id'],
'fund_code': item_code
}
manager_assoc = ManagerAssoc(**manager_assoc_data)
manager_assoc.upsert()
quarterly_dict['fund_code'] = item_code
quarterly_dict['total_asset'] = total_asset
quarterly_dict['id'] = snow_flake_id + i + 1
@ -192,13 +206,44 @@ def acquire_fund_quarter():
stock_dict['id'] = snow_flake_id + i + 1
# 入库
fund_insert.fund_stock_info(stock_dict)
quarterly_dict['fund_code'] = each_fund.fund_code
quarterly_dict['total_asset'] = init_total_asset
quarterly_dict['id'] = snow_flake_id
fund_insert.fund_quarterly_info(quarterly_dict)
if float(stock_position_total) > 0:
stock_dict['fund_code'] = each_fund.fund_code
stock_dict['id'] = snow_flake_id
fund_insert.fund_stock_info(stock_dict)
# pprint(fundDict)
page_start = page_start + page_limit
print(current_thread().getName(), 'page_start', page_start)
sleep(3)
except(BaseException):
chrome_driver.close()
raise BaseException
chrome_driver.close()
thread_count = 1
thread_count = 4
# for count in range(6):
total_start_time = time()
# record_total = each_fund_query.select_quarter_fund_total() # 获取记录条数
# print("record_total", record_total)
# bootstrap_thread(crawlData, record_total, thread_count)
for i in range(3):
print("i", i)
start_time = time()
record_total = each_fund_query.select_quarter_fund_total() # 获取记录条数
print('record_total', record_total)
try:
bootstrap_thread(crawlData, record_total, thread_count)
except:
end_time = time()
print("耗时: {:.2f}".format(end_time - start_time))
end_time = time()
print("耗时: {:.2f}".format(end_time - start_time))
total_end_time = time()
print("total耗时: {:.2f}".format(total_end_time - total_start_time))
exit()
if __name__ == '__main__':

@ -28,15 +28,12 @@ from utils.login import login_morning_star
connect_instance = connect()
cursor = connect_instance.cursor()
'''
判读是否当前页一致没有的话切换上一页下一页操作
'''
def text_to_be_present_in_element(locator, text, next_page_locator):
""" An expectation for checking if the given text is present in the
specified element.
locator, text
locator, text -- 判读是否当前页一致没有的话切换上一页下一页操作
"""
def _predicate(driver):
try:
@ -63,7 +60,6 @@ def get_fund_list(page_index):
page_count = 25 # 晨星固定分页数
page_total = math.ceil(int(chrome_driver.find_element_by_xpath(
'/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count)
result_dir = './output/'
output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
'类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n'
@ -119,13 +115,19 @@ def get_fund_list(page_index):
# 晨星基金专属晨星码
morning_star_code_list.append(current_morning_code)
name_list.append(tds_text[1].find_all('a')[0].string)
# print("name_list", name_list)
# 基金分类
fund_cat.append(tds_text[2].string)
# 三年评级
rating = get_star_count(tds_text[3].find_all('img')[0]['src'])
# rating = None
rating_3_img_ele = tds_text[3].find_all('img')[0]
rating_3_src = rating_3_img_ele['src']
rating = get_star_count(rating_3_src, current_morning_code, rating_3_img_ele)
fund_rating_3.append(rating)
# 5年评级
rating = get_star_count(tds_text[4].find_all('img')[0]['src'])
rating_5_img_ele = tds_text[4].find_all('img')[0]
rating_5_src = rating_5_img_ele['src']
rating = get_star_count(rating_5_src, current_morning_code, rating_5_img_ele)
fund_rating_5.append(rating)
# 今年以来回报(%)
return_value = tds_nume[3].string if tds_nume[3].string != '-' else None

Binary file not shown.

After

Width:  |  Height:  |  Size: 435 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 665 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 682 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 805 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 589 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -14,7 +14,7 @@ sys.path.append('./src')
from sqlalchemy.orm import Session
from models.manager import Manager, ManagerAssoc
from models.quarter import Quarter
from models.var import prefix, ORM_Base, engine
from models.var import engine
session = Session(engine)

@ -2,7 +2,6 @@
import pymysql
from config.env import env_db_host, env_db_name, env_db_user, env_db_password, env_db_stock_name
def connect():
connect = pymysql.connect(
host=env_db_host, user=env_db_user, password=env_db_password, db=env_db_name, charset='utf8')

@ -79,11 +79,11 @@ class FundApier:
pprint(res_json)
print('code:1', self.fund_code)
else:
pprint(res.raw)
print('url:', url)
print('code:2', self.fund_code)
raise('中断')
except:
pprint(res.raw)
print('url:', url)
print('code:3', self.fund_code)
raise('中断')
@ -135,6 +135,7 @@ class FundApier:
'fundcode': self.fund_code,
}
res = requests.post(url, headers=headers, data=payload)
# print("res", res)
res.encoding = "utf-8"
time.sleep(1)
try:
@ -169,5 +170,5 @@ class FundApier:
if __name__ == '__main__':
fund_api = FundApier('000421', end_date='2021-05-31',)
# fund_api.get_analyse_info_zh()
fund_api.get_analyse_info_zh()
# print("fund_api", fund_api)

@ -9,8 +9,8 @@ Copyright (c) 2020 Camel Lu
'''
import re
from time import sleep
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, date
from time import sleep, time
from utils.index import get_star_count, get_quarter_index, get_last_quarter_str
from selenium.common.exceptions import NoSuchElementException
@ -51,6 +51,8 @@ class FundSpider:
# 十大持仓信息
self.ten_top_stock_list = [] # 股票十大持仓股信息
def set_found_data(self, date):
self.found_date = date
# 处理基金详情页跳转
def go_fund_url(self, cookie_str=None):
# self.login_morning_star(cookie_str)
@ -156,10 +158,8 @@ class FundSpider:
manager['manager_id'] = manager_id
manager['manager_start_date'] = manager_ele.find_element_by_xpath(
"li[@class='col1']/i").text[0:10]
manager['brife'] = manager_ele.find_element_by_xpath(
"li[@class='col2']").text
self.manager_list.append(manager)
except NoSuchElementException:
@ -173,18 +173,42 @@ class FundSpider:
def get_fund_morning_rating(self):
try:
qt_el = self._chrome_driver.find_element_by_id('qt_star')
rating_3_src = qt_el.find_element_by_xpath(
"//li[@class='star3']/img").get_attribute('src')
rating_5_src = qt_el.find_element_by_xpath(
"//li[@class='star5']/img").get_attribute('src')
rating_10_src = qt_el.find_element_by_xpath(
"//li[@class='star10']/img").get_attribute('src')
rating_3 = get_star_count(rating_3_src)
rating_5 = get_star_count(rating_5_src)
rating_10 = get_star_count(rating_10_src)
rating_3_img_ele = qt_el.find_element_by_xpath(
"//li[@class='star3']/img")
rating_3_src = rating_3_img_ele.get_attribute('src')
rating_5_img_ele = qt_el.find_element_by_xpath(
"//li[@class='star5']/img")
rating_5_src = rating_5_img_ele.get_attribute('src')
rating_10_img_ele = qt_el.find_element_by_xpath(
"//li[@class='star10']/img")
rating_10_src = rating_10_img_ele.get_attribute('src')
delta = timedelta(days=3 * 365)
date_now = date.today()
is_more = False
if date_now - delta > self.found_date:
is_more = True
rating_3 = get_star_count(rating_3_src, self.fund_code, rating_3_img_ele)
self.morning_star_rating[3] = rating_3
if is_more == False:
return
delta = timedelta(days=5 * 365)
is_more = False
if date_now - delta > self.found_date:
is_more = True
rating_5 = get_star_count(rating_5_src, self.fund_code, rating_5_img_ele)
self.morning_star_rating[5] = rating_5
if is_more == False:
return
delta = timedelta(days=10 * 365)
if date_now - delta > self.found_date:
rating_10 = get_star_count(rating_10_src, self.fund_code, rating_10_img_ele)
self.morning_star_rating[10] = rating_10
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
@ -225,8 +249,9 @@ class FundSpider:
def get_fund_season_info(self):
# 总资产 TODO: 增加一个数据更新时间field
self.total_asset = self.get_element_text_by_class_name(
total_asset = self.get_element_text_by_class_name(
"asset", 'qt_base')
self.total_asset = float(total_asset) if total_asset else 0
# 投资风格
self.investname_style = self.get_element_text_by_class_name(
'sbdesc', 'qt_base')

@ -22,9 +22,12 @@ class FundSupplement:
def update_archive_status(self):
fund_query = FundQuery()
each_fund_update = FundUpdate()
start = 0
funds = fund_query.select_quarter_fund(0, 15000)
print("funds's len", len(funds))
for fund_item in funds:
for index in range(start, len(funds)):
# print("index", index)
fund_item = funds[index]
fund_code = fund_item[0]
fund_api = FundApier(fund_code, platform='zh_fund')
fund_api.get_analyse_info_zh()

@ -9,5 +9,6 @@ Copyright (c) 2022 Camel Lu
'''
print('--models init--');
import fund
import sys
sys.path.append('./src')
import models.fund

@ -19,7 +19,7 @@ ORM_Base = get_orm_base()
prefix = 'fund_morning_'
engine = get_engine(echo=True)
engine = get_engine(echo=False)
# class ORM_Base(Base):
# def __init__(self, **kwargs) -> None:

@ -82,10 +82,8 @@ class FundQuery(BaseQuery):
'普通债券型', '普通债券型(封闭)', '普通债券', '普通债券(封闭)', '普通债券型基金','普通债券型基金(封闭)', '信用债', '信用债(封闭)','目标日期', '商品 - 贵金属', '商品 - 其它' ) \
AND t.found_date <= %s \
AND t.is_archive = 0 \
AND t.fund_name NOT LIKE '%%C' \
AND t.fund_name NOT LIKE '%%B' \
AND t.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \
WHERE b.quarter_index = %s AND b.stock_position_total != 0)"
WHERE b.quarter_index = %s)"
return condition
# 筛选出要更新的基金季度性信息的基金(B,C类基金除外因为B、C基金大部分信息与A类一致)的总数
@ -100,8 +98,9 @@ class FundQuery(BaseQuery):
@lock_process
def select_quarter_fund(self, page_start, page_limit):
sql = "SELECT t.fund_code,\
t.morning_star_code, t.fund_name, t.fund_cat \
t.morning_star_code, t.fund_name, t.found_date, t.fund_cat \
FROM fund_morning_base as t " + self.get_select_quarter_condition() + " LIMIT %s, %s;"
self.cursor.execute(
sql, [self.quarter_date, self.quarter_index, page_start, page_limit]) # 执行sql语句
return self.cursor.fetchall() # 获取查询的所有记录
@ -225,8 +224,9 @@ class FundQuery(BaseQuery):
t.morning_star_code, t.fund_name \
FROM fund_morning_base as t \
LEFT JOIN fund_morning_snapshot as f ON f.fund_code = t.fund_code \
WHERE t.fund_name LIKE %s \
AND t.fund_name NOT LIKE '%%A';"
WHERE t.fund_name LIKE %s;"
# AND t.fund_name NOT LIKE '%%A';"
self.cursor.execute(sql_similar, [similar_name + '%'])
results = self.cursor.fetchall() # 获取查询的所有记录
return results

@ -2,14 +2,41 @@
import time
import datetime
import os
import numpy as np
import requests
from PIL import Image
from skimage import io
from sewar.full_ref import uqi, sam
import re
from threading import Thread, Lock
import pandas as pd
from openpyxl import load_workbook
requests.adapters.DEFAULT_RETRIES = 10 # 增加重连次数
s = requests.session()
s.keep_alive = False # 关闭多余连接
dir = os.getcwd() + '/src/'
img_dir = dir + 'img/'
samples_dir = dir + 'assets/samples/'
def use_sewar_get_star_level(img_path):
sample_imgs = os.listdir(samples_dir)
img1 = io.imread(fname=img_path)
for filename in sample_imgs:
level = filename[-5:-4]
img_path_2 = samples_dir + filename
img2 = io.imread(fname=img_path_2)
res_uqi = uqi(img1, img2)
res_sam = sam(img1, img2)
if res_uqi > 0.98 and res_sam < 0.11:
# res_level = level2
return level
print('img_path', img_path)
raise "img_path 图片比较失败"
def lock_process(func):
lock = Lock()
@ -28,27 +55,57 @@ def debug(func):
return wrapper # 返回包装过函数
def get_star_count(morning_star_url):
import numpy as np
import requests
from PIL import Image
def get_star_count_with_sewar(fund_code, img_ele):
picture_time = time.strftime(
"%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
directory_time = time.strftime("%Y-%m-%d", time.localtime(time.time()))
file_dir = os.getcwd() + '/star-record/' + directory_time + '/'
try:
if not os.path.exists(file_dir):
os.makedirs(file_dir)
print("目录新建成功:%s" % file_dir)
except BaseException as msg:
print("新建目录失败:%s" % msg)
code_path = './star-record/' + directory_time + '/' + picture_time + '_' + fund_code + '_' + '_code.png'
is_success = img_ele.screenshot(code_path)
time.sleep(2)
if is_success:
return use_sewar_get_star_level(code_path)
else:
raise "截图失败"
def get_star_count_with_np(morning_star_url):
module_path = os.getcwd() + '/src'
temp_star_url = module_path + '/assets/star/tmp.gif'
try:
r = requests.get(morning_star_url)
except BaseException:
raise BaseException('请求失败')
with open(temp_star_url, "wb") as f:
f.write(r.content)
f.close()
path = module_path + '/assets/star/star'
# path = './assets/star/star'
try:
for i in range(6):
p1 = np.array(Image.open(path + str(i) + '.gif'))
p2 = np.array(Image.open(temp_star_url))
if (p1 == p2).all():
return i
except:
print('morning_star_url', morning_star_url)
except BaseException:
raise BaseException('识别失败')
def get_star_count(morning_star_url, fund_code, img_ele=None):
# path = './assets/star/star'
try:
return get_star_count_with_sewar(fund_code, img_ele)
except BaseException:
print("BaseException", BaseException)
print('图片相似度比较失败')
return get_star_count_with_np(morning_star_url)
def parse_csv(datafile):

Loading…
Cancel
Save