diff --git a/output/fund_morning_quarter_catch.csv b/output/fund_morning_quarter_catch.csv new file mode 100644 index 0000000..73331b6 --- /dev/null +++ b/output/fund_morning_quarter_catch.csv @@ -0,0 +1 @@ +代码,晨星专属号,名称,类型,股票总仓位,页码,备注 diff --git a/output/xlsx/high-score-funds_log.xlsx b/output/xlsx/high-score-funds_log.xlsx index f7ca5eb..176c8a6 100644 Binary files a/output/xlsx/high-score-funds_log.xlsx and b/output/xlsx/high-score-funds_log.xlsx differ diff --git a/requirements.txt b/requirements.txt index f22fb5a..10eb3d2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,60 +1,10 @@ -absl-py==0.11.0 -astor==0.8.1 -autopep8==1.5.4 -beautifulsoup4==4.5.3 -bs4==0.0.1 -cached-property==1.5.2 -certifi==2020.12.5 -chardet==3.0.4 -cycler==0.10.0 -et-xmlfile==1.1.0 -fake-useragent==0.1.11 -gast==0.4.0 -google-pasta==0.2.0 -grpcio==1.34.0 -h5py==3.1.0 -idna==2.6 -importlib-metadata==3.3.0 -joblib==1.0.0 -Keras==2.2.4 -Keras-Applications==1.0.8 -Keras-Preprocessing==1.1.2 -kiwisolver==1.3.1 -lxml==4.6.2 -Markdown==3.3.3 -matplotlib==3.3.3 -ntplib==0.3.4 -numpy==1.19.5 -opencv-python==4.5.1.48 -openpyxl==3.0.7 -pandas==1.1.5 -Pillow==8.1.0 -protobuf==3.14.0 -pycodestyle==2.6.0 +selenium==3.11.0 PyMySQL==1.0.2 -pyparsing==2.4.7 -pysnowflake==0.1.3 -pytesseract==0.3.7 -python-dateutil==2.8.1 -python-dotenv==0.17.0 -pytz==2020.5 -PyYAML==5.3.1 +pandas==1.1.5 requests==2.18.4 -scikit-learn==0.24.0 -scipy==1.6.0 -selenium==3.11.0 -six==1.15.0 -sklearn==0.0 -tensorboard==1.14.0 -tensorflow==1.14.0 -tensorflow-estimator==1.14.0 -termcolor==1.1.0 -threadpoolctl==2.1.0 -toml==0.10.2 -tornado==6.1 -typing-extensions==3.7.4.3 -urllib3==1.22 -Werkzeug==1.0.1 -wrapcache==1.0.8 -wrapt==1.12.1 -zipp==3.4.0 +pytesseract==0.3.7 +numpy==1.19.5 +openpyxl==3.0.7 +beautifulsoup4==4.9.3 +Pillow==8.3.1 +python-dotenv==0.19.0 diff --git a/src/fund_info/statistic.py b/src/fund_info/statistic.py index 6fd8571..be1d0d5 100644 --- a/src/fund_info/statistic.py +++ b/src/fund_info/statistic.py @@ -7,6 +7,9 @@ Author: luxuemin2108@gmail.com ----- Copyright (c) 2021 Camel Lu ''' +from sql_model.stock_query import StockQuery +from sql_model.fund_query import FundQuery +from utils.index import get_quarter_index, fisrt_match_condition_from_list import time import datetime import re @@ -15,9 +18,6 @@ import sys from pprint import pprint sys.path.append('../') sys.path.append(os.getcwd() + '/src') -from utils.index import get_quarter_index, fisrt_match_condition_from_list -from sql_model.fund_query import FundQuery -from sql_model.stock_query import StockQuery class FundStatistic: @@ -53,7 +53,7 @@ class FundStatistic: code_dict = dict() for result in results: # print(result) - totol_asset = result[2] + totol_asset = result[2] for index in range(4, len(result), 3): code = result[index] name = result[index + 1] # 仅以股票名称为key,兼容港股,A股 @@ -63,12 +63,14 @@ class FundStatistic: #print('基金名称', result[1],'基金代码', result[0]) continue key = fisrt_match_condition_from_list(list(code_dict), code) - holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 + holder_asset = round( + portion * totol_asset / 100, 4) if totol_asset and portion else 0 if key == None and code and name: key = str(code) + '-' + str(name) if(key in code_dict and code != None): count = code_dict[key]['count'] + 1 - holder_asset = code_dict[key]['holder_asset'] + holder_asset + holder_asset = code_dict[key]['holder_asset'] + \ + holder_asset code_dict[key] = { 'count': count, 'holder_asset': holder_asset @@ -79,7 +81,7 @@ class FundStatistic: 'holder_asset': holder_asset } filer_dict = dict() - + for key, value in code_dict.items(): # for (key,value) in girl_dict.items() 这样加上括号也可以 if value['count'] > filter_count and key != None: filer_dict[key] = value @@ -105,14 +107,14 @@ class FundStatistic: ) code_dict = dict() for result in results: - #print(result) + # print(result) fund_info = { '基金代码': result[0], '基金名称': result[1], '基金金额': result[2], '股票总仓位': result[3], } - totol_asset = result[2] + totol_asset = result[2] for index in range(4, len(result), 3): code = result[index] name = result[index + 1] @@ -123,7 +125,8 @@ class FundStatistic: if key == None and code and name: key = str(code) + '-' + str(name) #key = str(name) - holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 + holder_asset = round( + portion * totol_asset / 100, 4) if totol_asset and portion else 0 if(key in code_dict and code != None): code_dict[key]['count'] = code_dict[key]['count'] + 1 code_dict[key]['fund_list'].append({ @@ -136,23 +139,28 @@ class FundStatistic: code_dict[key] = { 'count': 1, 'fund_list': [{ - **fund_info, - '仓位占比': portion, - '持有市值(亿元)': holder_asset, - '仓位排名': int(index / 3) - }] + **fund_info, + '仓位占比': portion, + '持有市值(亿元)': holder_asset, + '仓位排名': int(index / 3) + }] } - #for key, value in code_dict.items(): + # for key, value in code_dict.items(): # print('key, value', key, value) - print('code_dict.items()', code_dict.items()) + # print('code_dict.items()', code_dict.items()) return list(code_dict.items()) - #return sorted(code_dict.items(), key=lambda x: x[1]['count'], reverse=True) + # return sorted(code_dict.items(), key=lambda x: x[1]['count'], reverse=True) # 分组查询特定股票的每个季度基金持有总数 + def item_stock_fund_count(self, stock_code, fund_code_pool=None): return self.each_query.select_special_stock_fund_count(stock_code, fund_code_pool) - def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None,fund_code_pool=None): - result = self.each_query.select_special_stock_special_quarter_info(stock_code, quarter_index, fund_code_pool) + def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None): + """ + 即将废弃 + """ + result = self.each_query.select_special_stock_special_quarter_info( + stock_code, quarter_index, fund_code_pool) target_stock_dict = { 'count': len(result) } @@ -163,13 +171,13 @@ class FundStatistic: code = holders[index] if code == stock_code: portion = holders[index+1] - holder_asset = round(portion * total_asset / 100, 4) if total_asset and portion else 0 + holder_asset = round( + portion * total_asset / 100, 4) if total_asset and portion else 0 total_holder_asset = total_holder_asset + holder_asset break target_stock_dict['holder_asset'] = total_holder_asset return target_stock_dict - def select_fund_pool(self, *, morning_star_rating_5="", morning_star_rating_3="", **args): return self.each_query.select_certain_condition_funds( morning_star_rating_5=morning_star_rating_5, @@ -186,7 +194,7 @@ class FundStatistic: def summary_special_funds_stock_detail(self, fund_code_pool, quarter_index=None): holder_stock_industry_list = [] for fund_code in fund_code_pool: - fund_info = self.select_special_fund_info(fund_code, quarter_index ) + fund_info = self.select_special_fund_info(fund_code, quarter_index) fund_code = fund_info[0] fund_name = fund_info[1] fund_cat = fund_info[2] @@ -199,17 +207,22 @@ class FundStatistic: stock_name = fund_info[index+1] stock_portion = fund_info[index+2] stock_index = int((index - 4) / 3) - stock_list_industry = [fund_code, fund_name,fund_cat,fund_manager, fund_total_asset, fund_total_portion, fund_ten_portion, - stock_code, stock_name, stock_portion, stock_index] - #holder_stock_industry_list.append(stock_list_industry] + stock_list_industry = [fund_code, fund_name, fund_cat, fund_manager, fund_total_asset, fund_total_portion, fund_ten_portion, + stock_code, stock_name, stock_portion, stock_index] + # holder_stock_industry_list.append(stock_list_industry] if bool(re.search("^\d{6}$", stock_code)): - stock_list_industry_list = self.select_stock_pool_industry([stock_code]) + stock_list_industry_list = self.select_stock_pool_industry([ + stock_code]) stock_list_industry_dict = stock_list_industry_list[0] - industry_name_first = stock_list_industry_dict.get('industry_name_first') - industry_name_second = stock_list_industry_dict.get('industry_name_second') - industry_name_third = stock_list_industry_dict.get('industry_name_third') - - holder_stock_industry_list.append([*stock_list_industry, industry_name_third,industry_name_second, industry_name_first]) + industry_name_first = stock_list_industry_dict.get( + 'industry_name_first') + industry_name_second = stock_list_industry_dict.get( + 'industry_name_second') + industry_name_third = stock_list_industry_dict.get( + 'industry_name_third') + + holder_stock_industry_list.append( + [*stock_list_industry, industry_name_third, industry_name_second, industry_name_first]) return holder_stock_industry_list def query_all_stock_industry_info(self): diff --git a/src/fund_statistic.py b/src/fund_statistic.py index 2c7cf8e..3231440 100644 --- a/src/fund_statistic.py +++ b/src/fund_statistic.py @@ -11,12 +11,14 @@ Copyright (c) 2020 Camel Lu ''' import time import re +import decimal +import os from pprint import pprint import pandas as pd +import numpy as np from fund_info.statistic import FundStatistic -from utils.index import get_last_quarter_str, find_from_list_of_dict -from openpyxl import load_workbook -import os +from utils.index import get_last_quarter_str, get_stock_market, find_from_list_of_dict, update_xlsx_file + def get_fund_code_pool(): # fund_code_pool = ['000001', '160133', '360014', '420002', @@ -44,10 +46,24 @@ def get_fund_code_pool(): ) return fund_code_pool -def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_stock=None): - each_statistic = FundStatistic() + +def stocks_compare(stock_list, *, market=None, quarter_index=None, fund_code_pool=None, is_A_stock=None): + """与某个季度数据进行比较 + """ if quarter_index == None: quarter_index = get_last_quarter_str(2) + print("quarter_index", quarter_index) + + last_quarter_input_file = './outcome/数据整理/strategy/all_stock_rank/' + \ + quarter_index + '.xlsx' + data_last_quarter = pd.read_excel(io=last_quarter_input_file, engine="openpyxl", dtype={ + "代码": np.str}, sheet_name=None) + + if market: + df_data_target_market = data_last_quarter.get(market) + df_data_target_market[quarter_index + '持有数量(只)'] = df_data_target_market[quarter_index + '持有数量(只)'].astype( + int) + each_statistic = FundStatistic() filter_list = [] for stock in stock_list: @@ -58,22 +74,25 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_ stock_holder_detail = stock[1] holder_count = stock_holder_detail.get('count') holder_asset = stock_holder_detail.get('holder_asset') - - last_quarter_holder_detail_dict = each_statistic.select_special_stock_special_quarter_info( - stock_code, - quarter_index, - fund_code_pool - ) - - last_holder_count = last_quarter_holder_detail_dict['count'] - last_holder_asset = last_quarter_holder_detail_dict['holder_asset'] - + if not market: + target_market = get_stock_market(stock_code) + print("target_market", target_market) + df_data_target_market = data_last_quarter.get(target_market) + target_loc = df_data_target_market[df_data_target_market['代码'] == stock_code] + last_holder_count = 0 + last_holder_asset = 0 + if len(target_loc) == 1: + col_target = quarter_index + '持有数量(只)' + last_holder_count = target_loc[col_target].iloc[0] + col_target = quarter_index + '持有市值(亿元)' + last_holder_asset = round(decimal.Decimal( + target_loc[col_target].iloc[0]), 4) diff_holder_count = holder_count - last_holder_count diff_holder_asset = holder_asset - last_holder_asset diff_holder_count_percent = '{:.2%}'.format( diff_holder_count / last_holder_count) if last_holder_count != 0 else "+∞" - + diff_holder_asset_percent = '{:.2%}'.format( diff_holder_asset / last_holder_asset) if last_holder_asset != 0 else "+∞" # flag = '📈' if diff_holder_count > 0 else '📉' @@ -85,14 +104,18 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_ flag_asset = 'up' if diff_holder_asset > 0 else 'down' if diff_holder_asset == 0: flag = '=' - + item_tuple = [stock_code, stock_name, holder_count, last_holder_count, diff_holder_count, diff_holder_count_percent, flag_count, holder_asset, last_holder_asset, diff_holder_asset, diff_holder_asset_percent, flag_asset] if is_A_stock: - industry_name_third = stock_holder_detail.get('industry_name_third') - industry_name_second = stock_holder_detail.get('industry_name_second') - industry_name_first = stock_holder_detail.get('industry_name_first') - item_tuple = [*item_tuple, industry_name_third, industry_name_second,industry_name_first ] + industry_name_third = stock_holder_detail.get( + 'industry_name_third') + industry_name_second = stock_holder_detail.get( + 'industry_name_second') + industry_name_first = stock_holder_detail.get( + 'industry_name_first') + item_tuple = [*item_tuple, industry_name_third, + industry_name_second, industry_name_first] # if diff_percent == "+∞" or not float(diff_percent.rstrip('%')) < -20: filter_list.append(item_tuple) @@ -100,34 +123,38 @@ def stocks_compare(stock_list, *, quarter_index=None, fund_code_pool=None, is_A_ return filter_list # T100权重股排名 + + def t100_stocks_rank(quarter_index=None, *, each_statistic): if quarter_index == None: - quarter_index = get_last_quarter_str() + quarter_index = get_last_quarter_str(1) last_quarter_index = get_last_quarter_str(2) output_file = './outcome/数据整理/strategy/top100_rank.xlsx' sheet_name = quarter_index + '基金重仓股T100' - columns=['代码', - '名称', quarter_index + '持有数量(只)', last_quarter_index +'持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] + columns = ['代码', + '名称', quarter_index + '持有数量(只)', last_quarter_index + '持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] stock_top_list = each_statistic.all_stock_fund_count( quarter_index=quarter_index, filter_count=80) stock_top_list = stock_top_list[:100] # 获取top100权重股 - #pprint(stock_top_list) + # pprint(stock_top_list) filter_list = stocks_compare(stock_top_list) df_filter_list = pd.DataFrame(filter_list, columns=columns) - df_filter_list.to_excel(output_file, sheet_name=sheet_name) + update_xlsx_file(output_file, df_filter_list, sheet_name) + # df_filter_list.to_excel(output_file, sheet_name=sheet_name) + -# 所有股票排名 def all_stocks_rank(each_statistic): - quarter_index = get_last_quarter_str(2) + """所有股票排名 + """ + quarter_index = get_last_quarter_str(1) print("quarter_index", quarter_index) - last_quarter_index = get_last_quarter_str(3) + last_quarter_index = get_last_quarter_str(2) sheet_name = last_quarter_index + '基金重仓股T100' - columns=['代码', - '名称', quarter_index + '持有数量(只)', last_quarter_index +'持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] - output_file = './outcome/数据整理/strategy/all_stock_rank/'+ quarter_index +'.xlsx' - + columns = ['代码', + '名称', quarter_index + '持有数量(只)', last_quarter_index + '持有数量(只)', '持有数量环比', '持有数量环比百分比', '持有数量升或降', quarter_index + '持有市值(亿元)', last_quarter_index + '持有市值(亿元)', '持有市值环比', '持有市值环比百分比', '持有市值升或降'] + output_file = './outcome/数据整理/strategy/all_stock_rank/' + quarter_index + '.xlsx' stock_top_list = each_statistic.all_stock_fund_count( quarter_index=quarter_index, @@ -145,38 +172,60 @@ def all_stocks_rank(each_statistic): #path = '港股' hk_stock_list.append(stock_name_code) elif bool(re.search("^\d{6}$", stock_code)): - #'A股/深证主板'、'A股/创业板'、'A股/上证主板'、'A股/科创板' - a_condition = bool(re.search("^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code)) - target_item = find_from_list_of_dict(all_a_stocks_industry_info_list, 'stock_code', stock_code) + # 'A股/深证主板'、'A股/创业板'、'A股/上证主板'、'A股/科创板' + a_condition = bool(re.search( + "^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code)) + target_item = find_from_list_of_dict( + all_a_stocks_industry_info_list, 'stock_code', stock_code) if a_condition and target_item: - print('stock_code',stock_code) - stock_name_code[1]['industry_name_first'] = target_item.get('industry_name_first') - stock_name_code[1]['industry_name_second'] = target_item.get('industry_name_second') - stock_name_code[1]['industry_name_third'] = target_item.get('industry_name_third') + stock_name_code[1]['industry_name_first'] = target_item.get( + 'industry_name_first') + stock_name_code[1]['industry_name_second'] = target_item.get( + 'industry_name_second') + stock_name_code[1]['industry_name_third'] = target_item.get( + 'industry_name_third') a_stock_list.append(stock_name_code) else: other_stock_list.append(stock_name_code) else: other_stock_list.append(stock_name_code) - a_stock_compare_list = stocks_compare(a_stock_list, quarter_index=last_quarter_index, is_A_stock=True) - hk_stock_compare_list = stocks_compare(hk_stock_list,quarter_index=last_quarter_index,) - other_stock_compare_list = stocks_compare(other_stock_list,quarter_index=last_quarter_index,) + a_market = 'A股' + hk_market = '港股' + other_market = '其他' + + a_stock_compare_list = stocks_compare( + a_stock_list, market=a_market, quarter_index=last_quarter_index, is_A_stock=True) + hk_stock_compare_list = stocks_compare( + hk_stock_list, market=hk_market, quarter_index=last_quarter_index,) + other_stock_compare_list = stocks_compare( + other_stock_list, market=other_market, quarter_index=last_quarter_index,) a_columns = [*columns, '三级行业', '二级行业', '一级行业'] + df_a_list = pd.DataFrame(a_stock_compare_list, columns=a_columns) df_hk_list = pd.DataFrame(hk_stock_compare_list, columns=columns) df_other_list = pd.DataFrame(other_stock_compare_list, columns=columns) writer = pd.ExcelWriter(output_file, engine='xlsxwriter') - df_a_list.to_excel(writer, sheet_name='A股') + df_a_list.to_excel(writer, sheet_name=a_market) - df_hk_list.to_excel(writer, sheet_name='港股') + df_hk_list.to_excel(writer, sheet_name=hk_market) - df_other_list.to_excel(writer, sheet_name='其他') + df_other_list.to_excel(writer, sheet_name=other_market) writer.save() -def all_stock_holder_detail(quarter_index, each_statistic, threshold=0): + +def all_stock_holder_detail(each_statistic, *, quarter_index=None, threshold=0): + """ 所有股票的基金持仓细节 + + Args: + each_statistic (class): 统计类 + quarter_index (str, optional): 季度字符串. Defaults to None. + threshold (int, optional): 输出门槛. Defaults to 0. + """ + if quarter_index == None: + quarter_index = get_last_quarter_str() stock_list = each_statistic.all_stock_fund_count_and_details( quarter_index=quarter_index, filter_count=threshold) @@ -199,84 +248,75 @@ def all_stock_holder_detail(quarter_index, each_statistic, threshold=0): else: print('stock_name_code', stock_name_code) hold_fund_count = stock[1]['count'] - hold_fund_list = sorted(stock[1]['fund_list'], key=lambda x: x['持有市值(亿元)'], reverse=True) + hold_fund_list = sorted( + stock[1]['fund_list'], key=lambda x: x['持有市值(亿元)'], reverse=True) df_list = pd.DataFrame(hold_fund_list) - #if stock_code == 'NTES': + # if stock_code == 'NTES': # print('stock_code', df_list) stock_name_code = stock_name_code.replace('-*', '-').replace('/', '-') path = './outcome/数据整理/stocks/' + path + '/' + stock_name_code + '.xlsx' path = path.replace('\/', '-') print("path", path) - #print('df_list--',stock_name_code, df_list) - if os.path.exists(path): - writer = pd.ExcelWriter(path, engine='openpyxl') - book = load_workbook(path) - # 表名重复,删掉,重写 - if quarter_index in book.sheetnames: - del book[quarter_index] - if len(book.sheetnames) == 0: - df_list.to_excel( - path, sheet_name=quarter_index) - continue - else: - writer.book = book - df_list.to_excel( - writer, sheet_name=quarter_index) - writer.save() - writer.close() - else: - df_list.to_excel( - path, sheet_name=quarter_index) -# 获取某些基金的十大持仓股票信息 + + update_xlsx_file(path, df_list, quarter_index) + + def get_special_fund_code_holder_stock_detail(quarter_index, each_statistic): - #基金组合信息 - fund_portfolio ={ + """ 获取某些基金的十大持仓股票信息 + """ + # 基金组合信息 + fund_portfolio = { '001811': { 'name': '中欧明睿新常态混合A', - 'position' : 0.2 + 'position': 0.2 }, '001705': { 'name': '泓德战略转型股票', - 'position' : 0.2 + 'position': 0.2 }, '163415': { 'name': '兴全商业模式优选混合', - 'position' : 0.2 + 'position': 0.2 }, '001043': { 'name': '工银美丽城镇主题股票A', - 'position' : 0.2 + 'position': 0.2 }, '000547': { 'name': '建信健康民生混合', - 'position' : 0.2 + 'position': 0.2 }, - '450001': { + '450001': { 'name': '国富中国收益混合', - 'position' : 0.2 + 'position': 0.2 }, } fund_code_pool = list(fund_portfolio.keys()) - holder_stock_industry_list = each_statistic.summary_special_funds_stock_detail(fund_code_pool, quarter_index) + holder_stock_industry_list = each_statistic.summary_special_funds_stock_detail( + fund_code_pool, quarter_index) path = './outcome/数据整理/funds/' + '/' + '高分权益基金组合十大持仓明细' + '.xlsx' - columns=['基金代码','基金名称', '基金类型', '基金经理', '基金总资产(亿元)', '基金股票总仓位', '十大股票仓位', '股票代码', '股票名称', '所占仓位', '所处仓位排名', '三级行业', '二级行业', '一级行业'] + columns = ['基金代码', '基金名称', '基金类型', '基金经理', '基金总资产(亿元)', '基金股票总仓位', + '十大股票仓位', '股票代码', '股票名称', '所占仓位', '所处仓位排名', '三级行业', '二级行业', '一级行业'] df_a_list = pd.DataFrame(holder_stock_industry_list, columns=columns) writer = pd.ExcelWriter(path, engine='xlsxwriter') df_a_list.to_excel(writer, sheet_name='十大持仓明细--' + quarter_index) writer.save() + + if __name__ == '__main__': each_statistic = FundStatistic() - quarter_index = "2021-Q1" + # quarter_index = "2021-Q2" + # read_excel(path, 'A股', '601888', '2021-Q1持有市值(亿元)') # 所有股票的基金持仓细节 - #all_stock_holder_detail(quarter_index, each_statistic) + # all_stock_holder_detail(each_statistic) # 获取所有股票排名,按股票市场分类输出 - all_stocks_rank(each_statistic) + # all_stocks_rank(each_statistic) # 获取Top100股票排名 - #t100_stocks_rank(each_statistic=each_statistic) - + t100_stocks_rank(each_statistic=each_statistic) + # 获取某些基金的十大持仓股票信息 #get_special_fund_code_holder_stock_detail(quarter_index, each_statistic) diff --git a/src/fund_strategy.py b/src/fund_strategy.py index 24ff227..61fbc0a 100644 --- a/src/fund_strategy.py +++ b/src/fund_strategy.py @@ -7,17 +7,23 @@ Author: luxuemin2108@gmail.com ----- Copyright (c) 2021 Camel Lu ''' + +import os from sql_model.fund_query import FundQuery import pandas as pd -from openpyxl import load_workbook,Workbook +from openpyxl import load_workbook, Workbook from openpyxl.utils import get_column_letter from utils.index import get_last_quarter_str from pprint import pprint -# 输出高分基金 -def output_high_score_funds(each_query,quarter_index=None): + +def output_high_score_funds(each_query, quarter_index=None): + """ + 输出高分基金 + """ if quarter_index == None: - quarter_index = get_last_quarter_str() + quarter_index = get_last_quarter_str() + print("quarter_index", quarter_index) high_score_funds = each_query.select_high_score_funds( quarter_index=quarter_index) columns_bk = ['代码', '名称', '季度', '总资产', '现任基金经理管理起始时间', '投资风格', '三月最大回撤', '六月最大回撤', '夏普比率', '阿尔法系数', '贝塔系数', @@ -34,27 +40,28 @@ def output_high_score_funds(each_query,quarter_index=None): # df_high_score_funds.to_excel(writer, sheet_name=quarter_index) # df2.to_excel(writer, sheet_name='Sheet2') path = './output/xlsx/high-score-funds_log.xlsx' - writer = pd.ExcelWriter(path, engine='openpyxl') - book = load_workbook(path) - writer.book = book - df_high_score_funds.to_excel(writer, sheet_name=quarter_index) - writer.save() - writer.close() + if os.path.exists(path): + writer = pd.ExcelWriter(path, engine='openpyxl') + book = load_workbook(path) + writer.book = book + # 表名重复,删掉,重写 + if quarter_index in book.sheetnames: + del book[quarter_index] -if __name__ == '__main__': - #each_query = FundQuery() - #quarter_index = '2020-Q4' - #output_high_score_funds() - dest_filename = 'empty_book.xlsx' - #wb = Workbook(dest_filename) - wb = load_workbook(filename = 'empty_book.xlsx') - ws = wb.active - print("ws", ws) - #ws.merge_cells('A2:D2') - ws.merge_cells(start_row=2, start_column=1, end_row=4, end_column=4) - ws.merge_cells('J17:J20') - ws.column_dimensions.group('A','D', hidden=True) - ws.row_dimensions.group(1,10, hidden=True) - wb.save(dest_filename) - #ws.unmerge_cells('A2:D2') + if len(book.sheetnames) == 0: + df_high_score_funds.to_excel( + path, sheet_name=quarter_index) + else: + writer.book = book + df_high_score_funds.to_excel( + writer, sheet_name=quarter_index) + writer.save() + writer.close() + else: + df_high_score_funds.to_excel( + path, sheet_name=quarter_index) + +if __name__ == '__main__': + each_query = FundQuery() + output_high_score_funds(each_query) diff --git a/src/sql_model/fund_query.py b/src/sql_model/fund_query.py index 05be90e..e6d1797 100644 --- a/src/sql_model/fund_query.py +++ b/src/sql_model/fund_query.py @@ -111,7 +111,6 @@ class FundQuery(BaseQuery): sql, [self.quarter_date, self.quarter_index, page_start, page_limit]) # 执行sql语句 return self.cursor.fetchall() # 获取查询的所有记录 - @lock_process def select_high_score_funds(self, *, quarter_index=None): """获取高分基金池 @@ -262,8 +261,8 @@ class FundQuery(BaseQuery): return () list_str = ', '.join(fund_code_pool) fund_code_list_sql = "AND t.fund_code IN (" + list_str + ")" - sql_query_quarter = "SELECT t.fund_code, t.fund_name, u.total_asset, t.stock_position_total, " + stock_sql_join + \ - " FROM fund_morning_stock_info as t LEFT JOIN fund_morning_quarter as u ON u.fund_code = t.fund_code AND u.quarter_index = t.quarter_index WHERE u.quarter_index = %s AND t.stock_position_total > 20 " + \ + sql_query_quarter = "SELECT t.fund_code, a.fund_name, u.total_asset, t.stock_position_total, " + stock_sql_join + \ + " FROM fund_morning_stock_info as t LEFT JOIN fund_morning_quarter as u ON u.fund_code = t.fund_code AND u.quarter_index = t.quarter_index LEFT JOIN fund_morning_base as a ON a.fund_code = t.fund_code WHERE u.quarter_index = %s AND t.stock_position_total > 20 " + \ fund_code_list_sql + \ ";" # 大于20%股票持仓基金 if quarter_index == None: @@ -299,6 +298,9 @@ class FundQuery(BaseQuery): # def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None): + """ + 即将废弃 + """ if quarter_index == None: quarter_index = self.quarter_index diff --git a/src/utils/index.py b/src/utils/index.py index 05185c6..6521efc 100644 --- a/src/utils/index.py +++ b/src/utils/index.py @@ -2,8 +2,13 @@ import time import datetime import os + +import re from threading import Thread, Lock +import pandas as pd +from openpyxl import load_workbook + def lock_process(func): lock = Lock() @@ -125,6 +130,38 @@ def find_from_list_of_dict(dict_list, match_key, value): return res +def get_stock_market(stock_code): + if bool(re.search("^\d{5}$", stock_code)): + return '港股' + elif bool(re.search("^\d{6}$", stock_code)) and bool(re.search( + "^(00(0|1|2|3)\d{3})|(30(0|1)\d{3})|(60(0|1|2|3|5)\d{3})|68(8|9)\d{3}$", stock_code)): + return 'A股' + else: + return '其他' + + +def update_xlsx_file(path, df_data, sheet_name): + if os.path.exists(path): + writer = pd.ExcelWriter(path, engine='openpyxl') + book = load_workbook(path) + # 表名重复,删掉,重写 + if sheet_name in book.sheetnames: + del book[sheet_name] + if len(book.sheetnames) == 0: + df_data.to_excel( + path, sheet_name=sheet_name) + return + else: + writer.book = book + df_data.to_excel( + writer, sheet_name=sheet_name) + writer.save() + writer.close() + else: + df_data.to_excel( + path, sheet_name=sheet_name) + + def bootstrap_thread(target_fn, total, thread_count=2): threaders = [] start_time = time.time()