feat: 🎉 third quarter data crawler

main
jackluson 3 years ago
parent 78a429abe8
commit 926c7a1c79

@ -0,0 +1 @@
代码,晨星专属号,名称,类型,股票总仓位,页码,备注
1 代码 晨星专属号 名称 类型 股票总仓位 页码 备注

@ -1,60 +1,10 @@
absl-py==0.11.0 selenium==3.11.0
astor==0.8.1
autopep8==1.5.4
beautifulsoup4==4.5.3
bs4==0.0.1
cached-property==1.5.2
certifi==2020.12.5
chardet==3.0.4
cycler==0.10.0
et-xmlfile==1.1.0
fake-useragent==0.1.11
gast==0.4.0
google-pasta==0.2.0
grpcio==1.34.0
h5py==3.1.0
idna==2.6
importlib-metadata==3.3.0
joblib==1.0.0
Keras==2.2.4
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
lxml==4.6.2
Markdown==3.3.3
matplotlib==3.3.3
ntplib==0.3.4
numpy==1.19.5
opencv-python==4.5.1.48
openpyxl==3.0.7
pandas==1.1.5
Pillow==8.1.0
protobuf==3.14.0
pycodestyle==2.6.0
PyMySQL==1.0.2 PyMySQL==1.0.2
pyparsing==2.4.7 pandas==1.1.5
pysnowflake==0.1.3
pytesseract==0.3.7
python-dateutil==2.8.1
python-dotenv==0.17.0
pytz==2020.5
PyYAML==5.3.1
requests==2.18.4 requests==2.18.4
scikit-learn==0.24.0 pytesseract==0.3.7
scipy==1.6.0 numpy==1.19.5
selenium==3.11.0 openpyxl==3.0.7
six==1.15.0 beautifulsoup4==4.9.3
sklearn==0.0 Pillow==8.3.1
tensorboard==1.14.0 python-dotenv==0.19.0
tensorflow==1.14.0
tensorflow-estimator==1.14.0
termcolor==1.1.0
threadpoolctl==2.1.0
toml==0.10.2
tornado==6.1
typing-extensions==3.7.4.3
urllib3==1.22
Werkzeug==1.0.1
wrapcache==1.0.8
wrapt==1.12.1
zipp==3.4.0

@ -7,6 +7,9 @@ Author: luxuemin2108@gmail.com
----- -----
Copyright (c) 2021 Camel Lu Copyright (c) 2021 Camel Lu
''' '''
from sql_model.stock_query import StockQuery
from sql_model.fund_query import FundQuery
from utils.index import get_quarter_index, fisrt_match_condition_from_list
import time import time
import datetime import datetime
import re import re
@ -15,9 +18,6 @@ import sys
from pprint import pprint from pprint import pprint
sys.path.append('../') sys.path.append('../')
sys.path.append(os.getcwd() + '/src') sys.path.append(os.getcwd() + '/src')
from utils.index import get_quarter_index, fisrt_match_condition_from_list
from sql_model.fund_query import FundQuery
from sql_model.stock_query import StockQuery
class FundStatistic: class FundStatistic:
@ -53,7 +53,7 @@ class FundStatistic:
code_dict = dict() code_dict = dict()
for result in results: for result in results:
# print(result) # print(result)
totol_asset = result[2] totol_asset = result[2]
for index in range(4, len(result), 3): for index in range(4, len(result), 3):
code = result[index] code = result[index]
name = result[index + 1] # 仅以股票名称为key兼容港股A股 name = result[index + 1] # 仅以股票名称为key兼容港股A股
@ -63,12 +63,14 @@ class FundStatistic:
#print('基金名称', result[1],'基金代码', result[0]) #print('基金名称', result[1],'基金代码', result[0])
continue continue
key = fisrt_match_condition_from_list(list(code_dict), code) key = fisrt_match_condition_from_list(list(code_dict), code)
holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 holder_asset = round(
portion * totol_asset / 100, 4) if totol_asset and portion else 0
if key == None and code and name: if key == None and code and name:
key = str(code) + '-' + str(name) key = str(code) + '-' + str(name)
if(key in code_dict and code != None): if(key in code_dict and code != None):
count = code_dict[key]['count'] + 1 count = code_dict[key]['count'] + 1
holder_asset = code_dict[key]['holder_asset'] + holder_asset holder_asset = code_dict[key]['holder_asset'] + \
holder_asset
code_dict[key] = { code_dict[key] = {
'count': count, 'count': count,
'holder_asset': holder_asset 'holder_asset': holder_asset
@ -79,7 +81,7 @@ class FundStatistic:
'holder_asset': holder_asset 'holder_asset': holder_asset
} }
filer_dict = dict() filer_dict = dict()
for key, value in code_dict.items(): # for (key,value) in girl_dict.items() 这样加上括号也可以 for key, value in code_dict.items(): # for (key,value) in girl_dict.items() 这样加上括号也可以
if value['count'] > filter_count and key != None: if value['count'] > filter_count and key != None:
filer_dict[key] = value filer_dict[key] = value
@ -105,14 +107,14 @@ class FundStatistic:
) )
code_dict = dict() code_dict = dict()
for result in results: for result in results:
#print(result) # print(result)
fund_info = { fund_info = {
'基金代码': result[0], '基金代码': result[0],
'基金名称': result[1], '基金名称': result[1],
'基金金额': result[2], '基金金额': result[2],
'股票总仓位': result[3], '股票总仓位': result[3],
} }
totol_asset = result[2] totol_asset = result[2]
for index in range(4, len(result), 3): for index in range(4, len(result), 3):
code = result[index] code = result[index]
name = result[index + 1] name = result[index + 1]
@ -123,7 +125,8 @@ class FundStatistic:
if key == None and code and name: if key == None and code and name:
key = str(code) + '-' + str(name) key = str(code) + '-' + str(name)
#key = str(name) #key = str(name)
holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 holder_asset = round(
portion * totol_asset / 100, 4) if totol_asset and portion else 0
if(key in code_dict and code != None): if(key in code_dict and code != None):
code_dict[key]['count'] = code_dict[key]['count'] + 1 code_dict[key]['count'] = code_dict[key]['count'] + 1
code_dict[key]['fund_list'].append({ code_dict[key]['fund_list'].append({
@ -136,23 +139,28 @@ class FundStatistic:
code_dict[key] = { code_dict[key] = {
'count': 1, 'count': 1,
'fund_list': [{ 'fund_list': [{
**fund_info, **fund_info,
'仓位占比': portion, '仓位占比': portion,
'持有市值(亿元)': holder_asset, '持有市值(亿元)': holder_asset,
'仓位排名': int(index / 3) '仓位排名': int(index / 3)
}] }]
} }
#for key, value in code_dict.items(): # for key, value in code_dict.items():
# print('key, value', key, value) # print('key, value', key, value)
print('code_dict.items()', code_dict.items()) # print('code_dict.items()', code_dict.items())
return list(code_dict.items()) return list(code_dict.items())
#return sorted(code_dict.items(), key=lambda x: x[1]['count'], reverse=True) # return sorted(code_dict.items(), key=lambda x: x[1]['count'], reverse=True)
# 分组查询特定股票的每个季度基金持有总数 # 分组查询特定股票的每个季度基金持有总数
def item_stock_fund_count(self, stock_code, fund_code_pool=None): def item_stock_fund_count(self, stock_code, fund_code_pool=None):
return self.each_query.select_special_stock_fund_count(stock_code, fund_code_pool) return self.each_query.select_special_stock_fund_count(stock_code, fund_code_pool)
def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None,fund_code_pool=None): def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None):
result = self.each_query.select_special_stock_special_quarter_info(stock_code, quarter_index, fund_code_pool) """
即将废弃
"""
result = self.each_query.select_special_stock_special_quarter_info(
stock_code, quarter_index, fund_code_pool)
target_stock_dict = { target_stock_dict = {
'count': len(result) 'count': len(result)
} }
@ -163,13 +171,13 @@ class FundStatistic:
code = holders[index] code = holders[index]
if code == stock_code: if code == stock_code:
portion = holders[index+1] portion = holders[index+1]
holder_asset = round(portion * total_asset / 100, 4) if total_asset and portion else 0 holder_asset = round(
portion * total_asset / 100, 4) if total_asset and portion else 0
total_holder_asset = total_holder_asset + holder_asset total_holder_asset = total_holder_asset + holder_asset
break break
target_stock_dict['holder_asset'] = total_holder_asset target_stock_dict['holder_asset'] = total_holder_asset
return target_stock_dict return target_stock_dict
def select_fund_pool(self, *, morning_star_rating_5="", morning_star_rating_3="", **args): def select_fund_pool(self, *, morning_star_rating_5="", morning_star_rating_3="", **args):
return self.each_query.select_certain_condition_funds( return self.each_query.select_certain_condition_funds(
morning_star_rating_5=morning_star_rating_5, morning_star_rating_5=morning_star_rating_5,
@ -186,7 +194,7 @@ class FundStatistic:
def summary_special_funds_stock_detail(self, fund_code_pool, quarter_index=None): def summary_special_funds_stock_detail(self, fund_code_pool, quarter_index=None):
holder_stock_industry_list = [] holder_stock_industry_list = []
for fund_code in fund_code_pool: for fund_code in fund_code_pool:
fund_info = self.select_special_fund_info(fund_code, quarter_index ) fund_info = self.select_special_fund_info(fund_code, quarter_index)
fund_code = fund_info[0] fund_code = fund_info[0]
fund_name = fund_info[1] fund_name = fund_info[1]
fund_cat = fund_info[2] fund_cat = fund_info[2]
@ -199,17 +207,22 @@ class FundStatistic:
stock_name = fund_info[index+1] stock_name = fund_info[index+1]
stock_portion = fund_info[index+2] stock_portion = fund_info[index+2]
stock_index = int((index - 4) / 3) stock_index = int((index - 4) / 3)
stock_list_industry = [fund_code, fund_name,fund_cat,fund_manager, fund_total_asset, fund_total_portion, fund_ten_portion, stock_list_industry = [fund_code, fund_name, fund_cat, fund_manager, fund_total_asset, fund_total_portion, fund_ten_portion,
stock_code, stock_name, stock_portion, stock_index] stock_code, stock_name, stock_portion, stock_index]
#holder_stock_industry_list.append(stock_list_industry] # holder_stock_industry_list.append(stock_list_industry]
if bool(re.search("^\d{6}$", stock_code)): if bool(re.search("^\d{6}$", stock_code)):
stock_list_industry_list = self.select_stock_pool_industry([stock_code]) stock_list_industry_list = self.select_stock_pool_industry([
stock_code])
stock_list_industry_dict = stock_list_industry_list[0] stock_list_industry_dict = stock_list_industry_list[0]
industry_name_first = stock_list_industry_dict.get('industry_name_first') industry_name_first = stock_list_industry_dict.get(
industry_name_second = stock_list_industry_dict.get('industry_name_second') 'industry_name_first')
industry_name_third = stock_list_industry_dict.get('industry_name_third') industry_name_second = stock_list_industry_dict.get(
'industry_name_second')
holder_stock_industry_list.append([*stock_list_industry, industry_name_third,industry_name_second, industry_name_first]) industry_name_third = stock_list_industry_dict.get(
'industry_name_third')
holder_stock_industry_list.append(
[*stock_list_industry, industry_name_third, industry_name_second, industry_name_first])
return holder_stock_industry_list return holder_stock_industry_list
def query_all_stock_industry_info(self): def query_all_stock_industry_info(self):

@ -11,12 +11,14 @@ Copyright (c) 2020 Camel Lu
''' '''
import time import time
import re import re
import decimal
import os
from pprint import pprint from pprint import pprint
import pandas as pd import pandas as pd
import numpy as np
from fund_info.statistic import FundStatistic from fund_info.statistic import FundStatistic
from utils.index import get_last_quarter_str, find_from_list_of_dict from utils.index import get_last_quarter_str, get_stock_market, find_from_list_of_dict, update_xlsx_file
from openpyxl import load_workbook
import os
def get_fund_code_pool(): def get_fund_code_pool():
# fund_code_pool = ['000001', '160133', '360014', '420002', # fund_code_pool = ['000001', '160133', '360014', '420002',
@ -44,10 +46,24 @@ def get_fund_code_pool():
) )
return fund_code_pool return fund_code_pool
def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_stock=None):
each_statistic = FundStatistic() def stocks_compare(stock_list, *, market=None, quarter_index=None, fund_code_pool=None, is_A_stock=None):
"""与某个季度数据进行比较
"""
if quarter_index == None: if quarter_index == None:
quarter_index = get_last_quarter_str(2) quarter_index = get_last_quarter_str(2)
print("quarter_index", quarter_index)
last_quarter_input_file = './outcome/数据整理/strategy/all_stock_rank/' + \
quarter_index + '.xlsx'
data_last_quarter = pd.read_excel(io=last_quarter_input_file, engine="openpyxl", dtype={
"代码": np.str}, sheet_name=None)
if market:
df_data_target_market = data_last_quarter.get(market)
df_data_target_market[quarter_index + '持有数量(只)'] = df_data_target_market[quarter_index + '持有数量(只)'].astype(
int)
each_statistic = FundStatistic()
filter_list = [] filter_list = []
for stock in stock_list: for stock in stock_list:
@ -58,22 +74,25 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_
stock_holder_detail = stock[1] stock_holder_detail = stock[1]
holder_count = stock_holder_detail.get('count') holder_count = stock_holder_detail.get('count')
holder_asset = stock_holder_detail.get('holder_asset') holder_asset = stock_holder_detail.get('holder_asset')
if not market:
last_quarter_holder_detail_dict = each_statistic.select_special_stock_special_quarter_info( target_market = get_stock_market(stock_code)
stock_code, print("target_market", target_market)
quarter_index, df_data_target_market = data_last_quarter.get(target_market)
fund_code_pool target_loc = df_data_target_market[df_data_target_market['代码'] == stock_code]
) last_holder_count = 0
last_holder_asset = 0
last_holder_count = last_quarter_holder_detail_dict['count'] if len(target_loc) == 1:
last_holder_asset = last_quarter_holder_detail_dict['holder_asset'] col_target = quarter_index + '持有数量(只)'
last_holder_count = target_loc[col_target].iloc[0]
col_target = quarter_index + '持有市值(亿元)'
last_holder_asset = round(decimal.Decimal(
target_loc[col_target].iloc[0]), 4)
diff_holder_count = holder_count - last_holder_count diff_holder_count = holder_count - last_holder_count
diff_holder_asset = holder_asset - last_holder_asset diff_holder_asset = holder_asset - last_holder_asset
diff_holder_count_percent = '{:.2%}'.format( diff_holder_count_percent = '{:.2%}'.format(
diff_holder_count / last_holder_count) if last_holder_count != 0 else "+∞" diff_holder_count / last_holder_count) if last_holder_count != 0 else "+∞"
diff_holder_asset_percent = '{:.2%}'.format( diff_holder_asset_percent = '{:.2%}'.format(
diff_holder_asset / last_holder_asset) if last_holder_asset != 0 else "+∞" diff_holder_asset / last_holder_asset) if last_holder_asset != 0 else "+∞"
# flag = '📈' if diff_holder_count > 0 else '📉' # flag = '📈' if diff_holder_count > 0 else '📉'
@ -85,14 +104,18 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_
flag_asset = 'up' if diff_holder_asset > 0 else 'down' flag_asset = 'up' if diff_holder_asset > 0 else 'down'
if diff_holder_asset == 0: if diff_holder_asset == 0:
flag = '=' flag = '='
item_tuple = [stock_code, stock_name, holder_count, last_holder_count, item_tuple = [stock_code, stock_name, holder_count, last_holder_count,
diff_holder_count, diff_holder_count_percent, flag_count, holder_asset, last_holder_asset, diff_holder_asset, diff_holder_asset_percent, flag_asset] diff_holder_count, diff_holder_count_percent, flag_count, holder_asset, last_holder_asset, diff_holder_asset, diff_holder_asset_percent, flag_asset]
if is_A_stock: if is_A_stock:
industry_name_third = stock_holder_detail.get('industry_name_third') industry_name_third = stock_holder_detail.get(
industry_name_second = stock_holder_detail.get('industry_name_second') 'industry_name_third')
industry_name_first = stock_holder_detail.get('industry_name_first') industry_name_second = stock_holder_detail.get(
item_tuple = [*item_tuple, industry_name_third, industry_name_second,industry_name_first ] 'industry_name_second')
industry_name_first = stock_holder_detail.get(
'industry_name_first')
item_tuple = [*item_tuple, industry_name_third,
industry_name_second, industry_name_first]
# if diff_percent == "+∞" or not float(diff_percent.rstrip('%')) < -20: # if diff_percent == "+∞" or not float(diff_percent.rstrip('%')) < -20:
filter_list.append(item_tuple) filter_list.append(item_tuple)
@ -100,34 +123,38 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_
return filter_list return filter_list
# T100权重股排名 # T100权重股排名
def t100_stocks_rank(quarter_index=None, *, each_statistic): def t100_stocks_rank(quarter_index=None, *, each_statistic):
if quarter_index == None: if quarter_index == None:
quarter_index = get_last_quarter_str() quarter_index = get_last_quarter_str(1)
last_quarter_index = get_last_quarter_str(2) last_quarter_index = get_last_quarter_str(2)
output_file = './outcome/数据整理/strategy/top100_rank.xlsx' output_file = './outcome/数据整理/strategy/top100_rank.xlsx'
sheet_name = quarter_index + '基金重仓股T100' sheet_name = quarter_index + '基金重仓股T100'
columns=['代码', columns = ['代码',
'名称', quarter_index + '持有数量(只)', last_quarter_index +'持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] '名称', quarter_index + '持有数量(只)', last_quarter_index + '持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降']
stock_top_list = each_statistic.all_stock_fund_count( stock_top_list = each_statistic.all_stock_fund_count(
quarter_index=quarter_index, quarter_index=quarter_index,
filter_count=80) filter_count=80)
stock_top_list = stock_top_list[:100] # 获取top100权重股 stock_top_list = stock_top_list[:100] # 获取top100权重股
#pprint(stock_top_list) # pprint(stock_top_list)
filter_list = stocks_compare(stock_top_list) filter_list = stocks_compare(stock_top_list)
df_filter_list = pd.DataFrame(filter_list, columns=columns) df_filter_list = pd.DataFrame(filter_list, columns=columns)
df_filter_list.to_excel(output_file, sheet_name=sheet_name) update_xlsx_file(output_file, df_filter_list, sheet_name)
# df_filter_list.to_excel(output_file, sheet_name=sheet_name)
# 所有股票排名
def all_stocks_rank(each_statistic): def all_stocks_rank(each_statistic):
quarter_index = get_last_quarter_str(2) """所有股票排名
"""
quarter_index = get_last_quarter_str(1)
print("quarter_index", quarter_index) print("quarter_index", quarter_index)
last_quarter_index = get_last_quarter_str(3) last_quarter_index = get_last_quarter_str(2)
sheet_name = last_quarter_index + '基金重仓股T100' sheet_name = last_quarter_index + '基金重仓股T100'
columns=['代码', columns = ['代码',
'名称', quarter_index + '持有数量(只)', last_quarter_index +'持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] '名称', quarter_index + '持有数量(只)', last_quarter_index + '持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降']
output_file = './outcome/数据整理/strategy/all_stock_rank/'+ quarter_index +'.xlsx' output_file = './outcome/数据整理/strategy/all_stock_rank/' + quarter_index + '.xlsx'
stock_top_list = each_statistic.all_stock_fund_count( stock_top_list = each_statistic.all_stock_fund_count(
quarter_index=quarter_index, quarter_index=quarter_index,
@ -145,38 +172,60 @@ def all_stocks_rank(each_statistic):
#path = '港股' #path = '港股'
hk_stock_list.append(stock_name_code) hk_stock_list.append(stock_name_code)
elif bool(re.search("^\d{6}$", stock_code)): elif bool(re.search("^\d{6}$", stock_code)):
#'A股/深证主板'、'A股/创业板'、'A股/上证主板'、'A股/科创板' # 'A股/深证主板'、'A股/创业板'、'A股/上证主板'、'A股/科创板'
a_condition = bool(re.search("^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code)) a_condition = bool(re.search(
target_item = find_from_list_of_dict(all_a_stocks_industry_info_list, 'stock_code', stock_code) "^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code))
target_item = find_from_list_of_dict(
all_a_stocks_industry_info_list, 'stock_code', stock_code)
if a_condition and target_item: if a_condition and target_item:
print('stock_code',stock_code) stock_name_code[1]['industry_name_first'] = target_item.get(
stock_name_code[1]['industry_name_first'] = target_item.get('industry_name_first') 'industry_name_first')
stock_name_code[1]['industry_name_second'] = target_item.get('industry_name_second') stock_name_code[1]['industry_name_second'] = target_item.get(
stock_name_code[1]['industry_name_third'] = target_item.get('industry_name_third') 'industry_name_second')
stock_name_code[1]['industry_name_third'] = target_item.get(
'industry_name_third')
a_stock_list.append(stock_name_code) a_stock_list.append(stock_name_code)
else: else:
other_stock_list.append(stock_name_code) other_stock_list.append(stock_name_code)
else: else:
other_stock_list.append(stock_name_code) other_stock_list.append(stock_name_code)
a_stock_compare_list = stocks_compare(a_stock_list, quarter_index=last_quarter_index, is_A_stock=True) a_market = 'A股'
hk_stock_compare_list = stocks_compare(hk_stock_list,quarter_index=last_quarter_index,) hk_market = '港股'
other_stock_compare_list = stocks_compare(other_stock_list,quarter_index=last_quarter_index,) other_market = '其他'
a_stock_compare_list = stocks_compare(
a_stock_list, market=a_market, quarter_index=last_quarter_index, is_A_stock=True)
hk_stock_compare_list = stocks_compare(
hk_stock_list, market=hk_market, quarter_index=last_quarter_index,)
other_stock_compare_list = stocks_compare(
other_stock_list, market=other_market, quarter_index=last_quarter_index,)
a_columns = [*columns, '三级行业', '二级行业', '一级行业'] a_columns = [*columns, '三级行业', '二级行业', '一级行业']
df_a_list = pd.DataFrame(a_stock_compare_list, columns=a_columns) df_a_list = pd.DataFrame(a_stock_compare_list, columns=a_columns)
df_hk_list = pd.DataFrame(hk_stock_compare_list, columns=columns) df_hk_list = pd.DataFrame(hk_stock_compare_list, columns=columns)
df_other_list = pd.DataFrame(other_stock_compare_list, columns=columns) df_other_list = pd.DataFrame(other_stock_compare_list, columns=columns)
writer = pd.ExcelWriter(output_file, engine='xlsxwriter') writer = pd.ExcelWriter(output_file, engine='xlsxwriter')
df_a_list.to_excel(writer, sheet_name='A股') df_a_list.to_excel(writer, sheet_name=a_market)
df_hk_list.to_excel(writer, sheet_name='港股') df_hk_list.to_excel(writer, sheet_name=hk_market)
df_other_list.to_excel(writer, sheet_name='其他') df_other_list.to_excel(writer, sheet_name=other_market)
writer.save() writer.save()
def all_stock_holder_detail(quarter_index, each_statistic, threshold=0):
def all_stock_holder_detail(each_statistic, *, quarter_index=None, threshold=0):
""" 所有股票的基金持仓细节
Args:
each_statistic (class): 统计类
quarter_index (str, optional): 季度字符串. Defaults to None.
threshold (int, optional): 输出门槛. Defaults to 0.
"""
if quarter_index == None:
quarter_index = get_last_quarter_str()
stock_list = each_statistic.all_stock_fund_count_and_details( stock_list = each_statistic.all_stock_fund_count_and_details(
quarter_index=quarter_index, quarter_index=quarter_index,
filter_count=threshold) filter_count=threshold)
@ -199,84 +248,75 @@ def all_stock_holder_detail(quarter_index, each_statistic, threshold=0):
else: else:
print('stock_name_code', stock_name_code) print('stock_name_code', stock_name_code)
hold_fund_count = stock[1]['count'] hold_fund_count = stock[1]['count']
hold_fund_list = sorted(stock[1]['fund_list'], key=lambda x: x['持有市值(亿元)'], reverse=True) hold_fund_list = sorted(
stock[1]['fund_list'], key=lambda x: x['持有市值(亿元)'], reverse=True)
df_list = pd.DataFrame(hold_fund_list) df_list = pd.DataFrame(hold_fund_list)
#if stock_code == 'NTES': # if stock_code == 'NTES':
# print('stock_code', df_list) # print('stock_code', df_list)
stock_name_code = stock_name_code.replace('-*', '-').replace('/', '-') stock_name_code = stock_name_code.replace('-*', '-').replace('/', '-')
path = './outcome/数据整理/stocks/' + path + '/' + stock_name_code + '.xlsx' path = './outcome/数据整理/stocks/' + path + '/' + stock_name_code + '.xlsx'
path = path.replace('\/', '-') path = path.replace('\/', '-')
print("path", path) print("path", path)
#print('df_list--',stock_name_code, df_list)
if os.path.exists(path): update_xlsx_file(path, df_list, quarter_index)
writer = pd.ExcelWriter(path, engine='openpyxl')
book = load_workbook(path)
# 表名重复,删掉,重写
if quarter_index in book.sheetnames:
del book[quarter_index]
if len(book.sheetnames) == 0:
df_list.to_excel(
path, sheet_name=quarter_index)
continue
else:
writer.book = book
df_list.to_excel(
writer, sheet_name=quarter_index)
writer.save()
writer.close()
else:
df_list.to_excel(
path, sheet_name=quarter_index)
# 获取某些基金的十大持仓股票信息
def get_special_fund_code_holder_stock_detail(quarter_index, each_statistic): def get_special_fund_code_holder_stock_detail(quarter_index, each_statistic):
#基金组合信息 """ 获取某些基金的十大持仓股票信息
fund_portfolio ={ """
# 基金组合信息
fund_portfolio = {
'001811': { '001811': {
'name': '中欧明睿新常态混合A', 'name': '中欧明睿新常态混合A',
'position' : 0.2 'position': 0.2
}, },
'001705': { '001705': {
'name': '泓德战略转型股票', 'name': '泓德战略转型股票',
'position' : 0.2 'position': 0.2
}, },
'163415': { '163415': {
'name': '兴全商业模式优选混合', 'name': '兴全商业模式优选混合',
'position' : 0.2 'position': 0.2
}, },
'001043': { '001043': {
'name': '工银美丽城镇主题股票A', 'name': '工银美丽城镇主题股票A',
'position' : 0.2 'position': 0.2
}, },
'000547': { '000547': {
'name': '建信健康民生混合', 'name': '建信健康民生混合',
'position' : 0.2 'position': 0.2
}, },
'450001': { '450001': {
'name': '国富中国收益混合', 'name': '国富中国收益混合',
'position' : 0.2 'position': 0.2
}, },
} }
fund_code_pool = list(fund_portfolio.keys()) fund_code_pool = list(fund_portfolio.keys())
holder_stock_industry_list = each_statistic.summary_special_funds_stock_detail(fund_code_pool, quarter_index) holder_stock_industry_list = each_statistic.summary_special_funds_stock_detail(
fund_code_pool, quarter_index)
path = './outcome/数据整理/funds/' + '/' + '高分权益基金组合十大持仓明细' + '.xlsx' path = './outcome/数据整理/funds/' + '/' + '高分权益基金组合十大持仓明细' + '.xlsx'
columns=['基金代码','基金名称', '基金类型', '基金经理', '基金总资产(亿元)', '基金股票总仓位', '十大股票仓位', '股票代码', '股票名称', '所占仓位', '所处仓位排名', '三级行业', '二级行业', '一级行业'] columns = ['基金代码', '基金名称', '基金类型', '基金经理', '基金总资产(亿元)', '基金股票总仓位',
'十大股票仓位', '股票代码', '股票名称', '所占仓位', '所处仓位排名', '三级行业', '二级行业', '一级行业']
df_a_list = pd.DataFrame(holder_stock_industry_list, columns=columns) df_a_list = pd.DataFrame(holder_stock_industry_list, columns=columns)
writer = pd.ExcelWriter(path, engine='xlsxwriter') writer = pd.ExcelWriter(path, engine='xlsxwriter')
df_a_list.to_excel(writer, sheet_name='十大持仓明细--' + quarter_index) df_a_list.to_excel(writer, sheet_name='十大持仓明细--' + quarter_index)
writer.save() writer.save()
if __name__ == '__main__': if __name__ == '__main__':
each_statistic = FundStatistic() each_statistic = FundStatistic()
quarter_index = "2021-Q1" # quarter_index = "2021-Q2"
# read_excel(path, 'A股', '601888', '2021-Q1持有市值亿元')
# 所有股票的基金持仓细节 # 所有股票的基金持仓细节
#all_stock_holder_detail(quarter_index, each_statistic) # all_stock_holder_detail(each_statistic)
# 获取所有股票排名,按股票市场分类输出 # 获取所有股票排名,按股票市场分类输出
all_stocks_rank(each_statistic) # all_stocks_rank(each_statistic)
# 获取Top100股票排名 # 获取Top100股票排名
#t100_stocks_rank(each_statistic=each_statistic) t100_stocks_rank(each_statistic=each_statistic)
# 获取某些基金的十大持仓股票信息 # 获取某些基金的十大持仓股票信息
#get_special_fund_code_holder_stock_detail(quarter_index, each_statistic) #get_special_fund_code_holder_stock_detail(quarter_index, each_statistic)

@ -7,17 +7,23 @@ Author: luxuemin2108@gmail.com
----- -----
Copyright (c) 2021 Camel Lu Copyright (c) 2021 Camel Lu
''' '''
import os
from sql_model.fund_query import FundQuery from sql_model.fund_query import FundQuery
import pandas as pd import pandas as pd
from openpyxl import load_workbook,Workbook from openpyxl import load_workbook, Workbook
from openpyxl.utils import get_column_letter from openpyxl.utils import get_column_letter
from utils.index import get_last_quarter_str from utils.index import get_last_quarter_str
from pprint import pprint from pprint import pprint
# 输出高分基金
def output_high_score_funds(each_query,quarter_index=None): def output_high_score_funds(each_query, quarter_index=None):
"""
输出高分基金
"""
if quarter_index == None: if quarter_index == None:
quarter_index = get_last_quarter_str() quarter_index = get_last_quarter_str()
print("quarter_index", quarter_index)
high_score_funds = each_query.select_high_score_funds( high_score_funds = each_query.select_high_score_funds(
quarter_index=quarter_index) quarter_index=quarter_index)
columns_bk = ['代码', '名称', '季度', '总资产', '现任基金经理管理起始时间', '投资风格', '三月最大回撤', '六月最大回撤', '夏普比率', '阿尔法系数', '贝塔系数', columns_bk = ['代码', '名称', '季度', '总资产', '现任基金经理管理起始时间', '投资风格', '三月最大回撤', '六月最大回撤', '夏普比率', '阿尔法系数', '贝塔系数',
@ -34,27 +40,28 @@ def output_high_score_funds(each_query,quarter_index=None):
# df_high_score_funds.to_excel(writer, sheet_name=quarter_index) # df_high_score_funds.to_excel(writer, sheet_name=quarter_index)
# df2.to_excel(writer, sheet_name='Sheet2') # df2.to_excel(writer, sheet_name='Sheet2')
path = './output/xlsx/high-score-funds_log.xlsx' path = './output/xlsx/high-score-funds_log.xlsx'
writer = pd.ExcelWriter(path, engine='openpyxl') if os.path.exists(path):
book = load_workbook(path) writer = pd.ExcelWriter(path, engine='openpyxl')
writer.book = book book = load_workbook(path)
df_high_score_funds.to_excel(writer, sheet_name=quarter_index) writer.book = book
writer.save() # 表名重复,删掉,重写
writer.close() if quarter_index in book.sheetnames:
del book[quarter_index]
if __name__ == '__main__': if len(book.sheetnames) == 0:
#each_query = FundQuery() df_high_score_funds.to_excel(
#quarter_index = '2020-Q4' path, sheet_name=quarter_index)
#output_high_score_funds() else:
dest_filename = 'empty_book.xlsx' writer.book = book
#wb = Workbook(dest_filename) df_high_score_funds.to_excel(
wb = load_workbook(filename = 'empty_book.xlsx') writer, sheet_name=quarter_index)
ws = wb.active writer.save()
print("ws", ws) writer.close()
#ws.merge_cells('A2:D2') else:
ws.merge_cells(start_row=2, start_column=1, end_row=4, end_column=4) df_high_score_funds.to_excel(
ws.merge_cells('J17:J20') path, sheet_name=quarter_index)
ws.column_dimensions.group('A','D', hidden=True)
ws.row_dimensions.group(1,10, hidden=True)
wb.save(dest_filename)
#ws.unmerge_cells('A2:D2')
if __name__ == '__main__':
each_query = FundQuery()
output_high_score_funds(each_query)

@ -111,7 +111,6 @@ class FundQuery(BaseQuery):
sql, [self.quarter_date, self.quarter_index, page_start, page_limit]) # 执行sql语句 sql, [self.quarter_date, self.quarter_index, page_start, page_limit]) # 执行sql语句
return self.cursor.fetchall() # 获取查询的所有记录 return self.cursor.fetchall() # 获取查询的所有记录
@lock_process
def select_high_score_funds(self, *, quarter_index=None): def select_high_score_funds(self, *, quarter_index=None):
"""获取高分基金池 """获取高分基金池
@ -262,8 +261,8 @@ class FundQuery(BaseQuery):
return () return ()
list_str = ', '.join(fund_code_pool) list_str = ', '.join(fund_code_pool)
fund_code_list_sql = "AND t.fund_code IN (" + list_str + ")" fund_code_list_sql = "AND t.fund_code IN (" + list_str + ")"
sql_query_quarter = "SELECT t.fund_code, t.fund_name, u.total_asset, t.stock_position_total, " + stock_sql_join + \ sql_query_quarter = "SELECT t.fund_code, a.fund_name, u.total_asset, t.stock_position_total, " + stock_sql_join + \
" FROM fund_morning_stock_info as t LEFT JOIN fund_morning_quarter as u ON u.fund_code = t.fund_code AND u.quarter_index = t.quarter_index WHERE u.quarter_index = %s AND t.stock_position_total > 20 " + \ " FROM fund_morning_stock_info as t LEFT JOIN fund_morning_quarter as u ON u.fund_code = t.fund_code AND u.quarter_index = t.quarter_index LEFT JOIN fund_morning_base as a ON a.fund_code = t.fund_code WHERE u.quarter_index = %s AND t.stock_position_total > 20 " + \
fund_code_list_sql + \ fund_code_list_sql + \
";" # 大于20%股票持仓基金 ";" # 大于20%股票持仓基金
if quarter_index == None: if quarter_index == None:
@ -299,6 +298,9 @@ class FundQuery(BaseQuery):
# #
def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None): def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None):
"""
即将废弃
"""
if quarter_index == None: if quarter_index == None:
quarter_index = self.quarter_index quarter_index = self.quarter_index

@ -2,8 +2,13 @@
import time import time
import datetime import datetime
import os import os
import re
from threading import Thread, Lock from threading import Thread, Lock
import pandas as pd
from openpyxl import load_workbook
def lock_process(func): def lock_process(func):
lock = Lock() lock = Lock()
@ -125,6 +130,38 @@ def find_from_list_of_dict(dict_list, match_key, value):
return res return res
def get_stock_market(stock_code):
if bool(re.search("^\d{5}$", stock_code)):
return '港股'
elif bool(re.search("^\d{6}$", stock_code)) and bool(re.search(
"^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code)):
return 'A股'
else:
return '其他'
def update_xlsx_file(path, df_data, sheet_name):
if os.path.exists(path):
writer = pd.ExcelWriter(path, engine='openpyxl')
book = load_workbook(path)
# 表名重复,删掉,重写
if sheet_name in book.sheetnames:
del book[sheet_name]
if len(book.sheetnames) == 0:
df_data.to_excel(
path, sheet_name=sheet_name)
return
else:
writer.book = book
df_data.to_excel(
writer, sheet_name=sheet_name)
writer.save()
writer.close()
else:
df_data.to_excel(
path, sheet_name=sheet_name)
def bootstrap_thread(target_fn, total, thread_count=2): def bootstrap_thread(target_fn, total, thread_count=2):
threaders = [] threaders = []
start_time = time.time() start_time = time.time()

Loading…
Cancel
Save