diff --git a/src/acquire_fund_quarter.py b/src/acquire_fund_quarter.py index eaae8f4..553a9b4 100644 --- a/src/acquire_fund_quarter.py +++ b/src/acquire_fund_quarter.py @@ -15,7 +15,7 @@ from time import sleep, time from fund_info.api import FundApier from fund_info.crawler import FundSpider -from fund_info.csv import FundCSV +from fund_info.fund_csv import FundCSV from lib.mysnowflake import IdWorker from models.manager import Manager, ManagerAssoc from sql_model.fund_insert import FundInsert @@ -31,11 +31,14 @@ def get_total_asset(fund_code, platform): each_fund = FundApier(fund_code, end_date='2021-05-07', platform=platform) total_asset = each_fund.get_total_asset() # 如果在爱基金平台找不到,则到展恒基金找 - if total_asset == None and platform == 'ai_fund': - print("fund_code", total_asset, fund_code) + if total_asset == None and platform != 'zh_fund': each_fund = FundApier( fund_code, end_date='2021-05-10', platform='zh_fund') total_asset = each_fund.get_total_asset() + if total_asset == None and platform != 'ai_fund': + each_fund = FundApier( + fund_code, end_date='2021-05-10', platform='ai_fund') + total_asset = each_fund.get_total_asset() return total_asset def acquire_fund_quarter(): @@ -182,15 +185,19 @@ def acquire_fund_quarter(): similar_name = each_fund.fund_name[0:-1] results = each_fund_query.select_similar_fund( similar_name) # 获取查询的所有记录 - platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund' + # platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund' + platform = 'danjuan' for i in range(0, len(results)): item = results[i] item_code = item[0] if item_code == each_fund.fund_code: continue - print("item_code", item_code, platform ) + print("item_code", item_code, platform) total_asset = get_total_asset(item_code, platform) - init_total_asset = init_total_asset - total_asset + if total_asset != None: + init_total_asset = init_total_asset - total_asset + else: + print("total_asset is None", item_code, item[2]) manager_assoc_data = { 'quarter_index': quarter_index, 'manager_start_date': manager_item['manager_start_date'], @@ -225,7 +232,7 @@ def acquire_fund_quarter(): chrome_driver.close() raise BaseException chrome_driver.close() - thread_count = 4 + thread_count = 6 # for count in range(6): total_start_time = time() diff --git a/src/fund_info/api.py b/src/fund_info/api.py index 281dd77..d42f9e1 100644 --- a/src/fund_info/api.py +++ b/src/fund_info/api.py @@ -16,8 +16,16 @@ from pprint import pprint sys.path.append('../') sys.path.append(os.getcwd() + '/src') from utils.file_op import write_fund_json_data +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +session = requests.Session() +retry = Retry(connect=3, backoff_factor=0.5) +adapter = HTTPAdapter(max_retries=retry) +session.mount('http://', adapter) +session.mount('https://', adapter) + class FundApier: def __init__(self, code, *, end_date=None, platform='ai_fund'): self.fund_code = code @@ -35,7 +43,14 @@ class FundApier: fund_code=code, end_date=self.end_date ) - + def get_client_headers(self, *, referer="https://danjuanfunds.com"): + headers = { + 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8', + 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36', + 'Origin': referer, + 'Referer': referer, + } + return headers def get_total_asset(self): if self.base_info_is_exist(): return self.get_asset_from_json() @@ -43,6 +58,8 @@ class FundApier: return self.get_base_info_ai() elif self.platform == 'zh_fund': return self.get_base_info_zh() + elif self.platform == 'danjuan': + return self.get_base_info_from_danjuan() def get_asset_from_json(self): with open(self.file_path) as json_file: @@ -59,9 +76,9 @@ class FundApier: def get_base_info_ai(self): url = "http://fund.10jqka.com.cn/data/client/myfund/{0}".format( self.fund_code) - - res = requests.get(url) # 自动编码 - time.sleep(1) + headers = self.get_client_headers(referer="https://fund.10jqka.com.cn") + res = session.get(url, headers=headers) # 自动编码 + time.sleep(2) try: if res.status_code == 200: res_json = res.json() @@ -91,20 +108,17 @@ class FundApier: def get_base_info_zh(self): url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format( 'fundInfoHead_NEW') - headers = { - 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8' - } + headers = self.get_client_headers(referer="https://www.myfund.com") payload = { 'fundcode': self.fund_code, } - res = requests.post(url, headers=headers, data=payload) + res = session.post(url, headers=headers, data=payload) res.encoding = "utf-8" time.sleep(1) try: if res.status_code == 200: res_json = res.json() fund_scope = res_json.get('FundScope') - pprint(res_json) if res_json.get('Msg') == 'OK' and fund_scope != None: end_date = res_json.get('DealDate') total_asset = fund_scope[0:-1] @@ -128,12 +142,11 @@ class FundApier: def get_analyse_info_zh(self): url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format( 'singlefundAnalyse') - headers = { - 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8' - } + headers = self.get_client_headers(referer="https://www.myfund.com") payload = { 'fundcode': self.fund_code, } + # res = requests.post(url, headers=headers, data=payload, verify=False) res = requests.post(url, headers=headers, data=payload) # print("res", res) res.encoding = "utf-8" @@ -157,6 +170,36 @@ class FundApier: print('code:3', self.fund_code) raise('中断') + def get_base_info_from_danjuan(self): + url = "https://danjuanfunds.com/djapi/fund/{0}".format(self.fund_code) + headers = self.get_client_headers() + res = session.get(url, headers=headers) + try: + if res.status_code == 200: + res_json = res.json() + if res_json.get('result_code') == 0: + base_info = res.json().get('data') + + total_asset = base_info.get('totshare') + if(total_asset.endswith('万')): + total_asset = round(float(total_asset[0:-1]) / 10000, 3) + elif(total_asset.endswith('亿')): + total_asset = float(total_asset[0:-1]) + else: + print(total_asset, "not a number") + return + self.total_asset = total_asset + return self.total_asset + else: + pprint(res_json) + print('code:1', self.fund_code) + else: + pprint(res.content) + print('code:2', self.fund_code) + raise('中断') + except: + print('code:3', self.fund_code) + raise('中断') def write_info_in_json(self, end_date, json_data): filename = '{fund_code}{end_date}-base.json'.format( fund_code=self.fund_code, @@ -169,6 +212,6 @@ class FundApier: if __name__ == '__main__': - fund_api = FundApier('000421', end_date='2021-05-31',) - fund_api.get_analyse_info_zh() + fund_api = FundApier('011140', end_date='2021-05-31',) + fund_api.get_base_info_from_danjuan() # print("fund_api", fund_api) diff --git a/src/fund_info/csv.py b/src/fund_info/fund_csv.py similarity index 100% rename from src/fund_info/csv.py rename to src/fund_info/fund_csv.py diff --git a/src/sql_model/fund_query.py b/src/sql_model/fund_query.py index bf5a7a9..9cb9d59 100644 --- a/src/sql_model/fund_query.py +++ b/src/sql_model/fund_query.py @@ -77,9 +77,14 @@ class FundQuery(BaseQuery): def get_select_quarter_condition(self): condition = "WHERE t.fund_cat NOT LIKE '%%货币%%' \ - AND t.fund_cat NOT IN ('利率债', '利率债(封闭)', '短债', '短债基金', '短债型', '短债型(封闭)', '短债基金(封闭)',\ - '纯债', '纯债基金', '纯债(封闭)', '纯债基金(封闭)',\ - '普通债券型', '普通债券型(封闭)', '普通债券', '普通债券(封闭)', '普通债券型基金','普通债券型基金(封闭)', '信用债', '信用债(封闭)','目标日期', '商品 - 贵金属', '商品 - 其它' ) \ + AND t.fund_cat NOT LIKE '%%纯债%%' \ + AND t.fund_cat NOT LIKE '%%普通债券%%' \ + AND t.fund_cat NOT LIKE '%%短债%%' \ + AND t.fund_cat NOT LIKE '%%利率债%%' \ + AND t.fund_cat NOT LIKE '%%信用债%%' \ + AND t.fund_cat NOT LIKE '%%商品%%' \ + AND t.fund_cat NOT LIKE '%%环球债券%%' \ + AND t.fund_cat NOT IN ('目标日期','亚洲高收益债券') \ AND t.found_date <= %s \ AND t.is_archive = 0 \ AND t.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \ diff --git a/src/sync_fund_base.py b/src/sync_fund_base.py index 022e072..3a300c6 100644 --- a/src/sync_fund_base.py +++ b/src/sync_fund_base.py @@ -135,8 +135,8 @@ def further_complete_base_info(): } fund_base = FundBase(**base_dict) fund_base.upsert() - page_start = page_start + page_limit print('page_start', page_start) + page_start = page_start + page_limit chrome_driver.close() bootstrap_thread(crawlData, len(all_funds), 3) if __name__ == '__main__': @@ -144,4 +144,3 @@ if __name__ == '__main__': page_index = 1 # sync_fund_base(page_index) further_complete_base_info() - \ No newline at end of file