feat: add danjuan api

main
jackluson 2 years ago
parent 31bafab045
commit 57de7dfe2e

@ -15,7 +15,7 @@ from time import sleep, time
from fund_info.api import FundApier from fund_info.api import FundApier
from fund_info.crawler import FundSpider from fund_info.crawler import FundSpider
from fund_info.csv import FundCSV from fund_info.fund_csv import FundCSV
from lib.mysnowflake import IdWorker from lib.mysnowflake import IdWorker
from models.manager import Manager, ManagerAssoc from models.manager import Manager, ManagerAssoc
from sql_model.fund_insert import FundInsert from sql_model.fund_insert import FundInsert
@ -31,11 +31,14 @@ def get_total_asset(fund_code, platform):
each_fund = FundApier(fund_code, end_date='2021-05-07', platform=platform) each_fund = FundApier(fund_code, end_date='2021-05-07', platform=platform)
total_asset = each_fund.get_total_asset() total_asset = each_fund.get_total_asset()
# 如果在爱基金平台找不到,则到展恒基金找 # 如果在爱基金平台找不到,则到展恒基金找
if total_asset == None and platform == 'ai_fund': if total_asset == None and platform != 'zh_fund':
print("fund_code", total_asset, fund_code)
each_fund = FundApier( each_fund = FundApier(
fund_code, end_date='2021-05-10', platform='zh_fund') fund_code, end_date='2021-05-10', platform='zh_fund')
total_asset = each_fund.get_total_asset() total_asset = each_fund.get_total_asset()
if total_asset == None and platform != 'ai_fund':
each_fund = FundApier(
fund_code, end_date='2021-05-10', platform='ai_fund')
total_asset = each_fund.get_total_asset()
return total_asset return total_asset
def acquire_fund_quarter(): def acquire_fund_quarter():
@ -182,15 +185,19 @@ def acquire_fund_quarter():
similar_name = each_fund.fund_name[0:-1] similar_name = each_fund.fund_name[0:-1]
results = each_fund_query.select_similar_fund( results = each_fund_query.select_similar_fund(
similar_name) # 获取查询的所有记录 similar_name) # 获取查询的所有记录
platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund' # platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund'
platform = 'danjuan'
for i in range(0, len(results)): for i in range(0, len(results)):
item = results[i] item = results[i]
item_code = item[0] item_code = item[0]
if item_code == each_fund.fund_code: if item_code == each_fund.fund_code:
continue continue
print("item_code", item_code, platform ) print("item_code", item_code, platform)
total_asset = get_total_asset(item_code, platform) total_asset = get_total_asset(item_code, platform)
init_total_asset = init_total_asset - total_asset if total_asset != None:
init_total_asset = init_total_asset - total_asset
else:
print("total_asset is None", item_code, item[2])
manager_assoc_data = { manager_assoc_data = {
'quarter_index': quarter_index, 'quarter_index': quarter_index,
'manager_start_date': manager_item['manager_start_date'], 'manager_start_date': manager_item['manager_start_date'],
@ -225,7 +232,7 @@ def acquire_fund_quarter():
chrome_driver.close() chrome_driver.close()
raise BaseException raise BaseException
chrome_driver.close() chrome_driver.close()
thread_count = 4 thread_count = 6
# for count in range(6): # for count in range(6):
total_start_time = time() total_start_time = time()

@ -16,8 +16,16 @@ from pprint import pprint
sys.path.append('../') sys.path.append('../')
sys.path.append(os.getcwd() + '/src') sys.path.append(os.getcwd() + '/src')
from utils.file_op import write_fund_json_data from utils.file_op import write_fund_json_data
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
session = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
class FundApier: class FundApier:
def __init__(self, code, *, end_date=None, platform='ai_fund'): def __init__(self, code, *, end_date=None, platform='ai_fund'):
self.fund_code = code self.fund_code = code
@ -35,7 +43,14 @@ class FundApier:
fund_code=code, fund_code=code,
end_date=self.end_date end_date=self.end_date
) )
def get_client_headers(self, *, referer="https://danjuanfunds.com"):
headers = {
'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36',
'Origin': referer,
'Referer': referer,
}
return headers
def get_total_asset(self): def get_total_asset(self):
if self.base_info_is_exist(): if self.base_info_is_exist():
return self.get_asset_from_json() return self.get_asset_from_json()
@ -43,6 +58,8 @@ class FundApier:
return self.get_base_info_ai() return self.get_base_info_ai()
elif self.platform == 'zh_fund': elif self.platform == 'zh_fund':
return self.get_base_info_zh() return self.get_base_info_zh()
elif self.platform == 'danjuan':
return self.get_base_info_from_danjuan()
def get_asset_from_json(self): def get_asset_from_json(self):
with open(self.file_path) as json_file: with open(self.file_path) as json_file:
@ -59,9 +76,9 @@ class FundApier:
def get_base_info_ai(self): def get_base_info_ai(self):
url = "http://fund.10jqka.com.cn/data/client/myfund/{0}".format( url = "http://fund.10jqka.com.cn/data/client/myfund/{0}".format(
self.fund_code) self.fund_code)
headers = self.get_client_headers(referer="https://fund.10jqka.com.cn")
res = requests.get(url) # 自动编码 res = session.get(url, headers=headers) # 自动编码
time.sleep(1) time.sleep(2)
try: try:
if res.status_code == 200: if res.status_code == 200:
res_json = res.json() res_json = res.json()
@ -91,20 +108,17 @@ class FundApier:
def get_base_info_zh(self): def get_base_info_zh(self):
url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format( url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format(
'fundInfoHead_NEW') 'fundInfoHead_NEW')
headers = { headers = self.get_client_headers(referer="https://www.myfund.com")
'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8'
}
payload = { payload = {
'fundcode': self.fund_code, 'fundcode': self.fund_code,
} }
res = requests.post(url, headers=headers, data=payload) res = session.post(url, headers=headers, data=payload)
res.encoding = "utf-8" res.encoding = "utf-8"
time.sleep(1) time.sleep(1)
try: try:
if res.status_code == 200: if res.status_code == 200:
res_json = res.json() res_json = res.json()
fund_scope = res_json.get('FundScope') fund_scope = res_json.get('FundScope')
pprint(res_json)
if res_json.get('Msg') == 'OK' and fund_scope != None: if res_json.get('Msg') == 'OK' and fund_scope != None:
end_date = res_json.get('DealDate') end_date = res_json.get('DealDate')
total_asset = fund_scope[0:-1] total_asset = fund_scope[0:-1]
@ -128,12 +142,11 @@ class FundApier:
def get_analyse_info_zh(self): def get_analyse_info_zh(self):
url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format( url = "https://www.myfund.com/webinterface/Bamboo.ashx?command={0}".format(
'singlefundAnalyse') 'singlefundAnalyse')
headers = { headers = self.get_client_headers(referer="https://www.myfund.com")
'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8'
}
payload = { payload = {
'fundcode': self.fund_code, 'fundcode': self.fund_code,
} }
# res = requests.post(url, headers=headers, data=payload, verify=False)
res = requests.post(url, headers=headers, data=payload) res = requests.post(url, headers=headers, data=payload)
# print("res", res) # print("res", res)
res.encoding = "utf-8" res.encoding = "utf-8"
@ -157,6 +170,36 @@ class FundApier:
print('code:3', self.fund_code) print('code:3', self.fund_code)
raise('中断') raise('中断')
def get_base_info_from_danjuan(self):
url = "https://danjuanfunds.com/djapi/fund/{0}".format(self.fund_code)
headers = self.get_client_headers()
res = session.get(url, headers=headers)
try:
if res.status_code == 200:
res_json = res.json()
if res_json.get('result_code') == 0:
base_info = res.json().get('data')
total_asset = base_info.get('totshare')
if(total_asset.endswith('')):
total_asset = round(float(total_asset[0:-1]) / 10000, 3)
elif(total_asset.endswith('亿')):
total_asset = float(total_asset[0:-1])
else:
print(total_asset, "not a number")
return
self.total_asset = total_asset
return self.total_asset
else:
pprint(res_json)
print('code:1', self.fund_code)
else:
pprint(res.content)
print('code:2', self.fund_code)
raise('中断')
except:
print('code:3', self.fund_code)
raise('中断')
def write_info_in_json(self, end_date, json_data): def write_info_in_json(self, end_date, json_data):
filename = '{fund_code}{end_date}-base.json'.format( filename = '{fund_code}{end_date}-base.json'.format(
fund_code=self.fund_code, fund_code=self.fund_code,
@ -169,6 +212,6 @@ class FundApier:
if __name__ == '__main__': if __name__ == '__main__':
fund_api = FundApier('000421', end_date='2021-05-31',) fund_api = FundApier('011140', end_date='2021-05-31',)
fund_api.get_analyse_info_zh() fund_api.get_base_info_from_danjuan()
# print("fund_api", fund_api) # print("fund_api", fund_api)

@ -77,9 +77,14 @@ class FundQuery(BaseQuery):
def get_select_quarter_condition(self): def get_select_quarter_condition(self):
condition = "WHERE t.fund_cat NOT LIKE '%%货币%%' \ condition = "WHERE t.fund_cat NOT LIKE '%%货币%%' \
AND t.fund_cat NOT IN ('利率债', '利率债(封闭)', '短债', '短债基金', '短债型', '短债型(封闭)', '短债基金(封闭)',\ AND t.fund_cat NOT LIKE '%%纯债%%' \
'纯债', '纯债基金', '纯债(封闭)', '纯债基金(封闭)',\ AND t.fund_cat NOT LIKE '%%普通债券%%' \
'普通债券型', '普通债券型(封闭)', '普通债券', '普通债券(封闭)', '普通债券型基金','普通债券型基金(封闭)', '信用债', '信用债(封闭)','目标日期', '商品 - 贵金属', '商品 - 其它' ) \ AND t.fund_cat NOT LIKE '%%短债%%' \
AND t.fund_cat NOT LIKE '%%利率债%%' \
AND t.fund_cat NOT LIKE '%%信用债%%' \
AND t.fund_cat NOT LIKE '%%商品%%' \
AND t.fund_cat NOT LIKE '%%环球债券%%' \
AND t.fund_cat NOT IN ('目标日期','亚洲高收益债券') \
AND t.found_date <= %s \ AND t.found_date <= %s \
AND t.is_archive = 0 \ AND t.is_archive = 0 \
AND t.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \ AND t.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \

@ -135,8 +135,8 @@ def further_complete_base_info():
} }
fund_base = FundBase(**base_dict) fund_base = FundBase(**base_dict)
fund_base.upsert() fund_base.upsert()
page_start = page_start + page_limit
print('page_start', page_start) print('page_start', page_start)
page_start = page_start + page_limit
chrome_driver.close() chrome_driver.close()
bootstrap_thread(crawlData, len(all_funds), 3) bootstrap_thread(crawlData, len(all_funds), 3)
if __name__ == '__main__': if __name__ == '__main__':
@ -144,4 +144,3 @@ if __name__ == '__main__':
page_index = 1 page_index = 1
# sync_fund_base(page_index) # sync_fund_base(page_index)
further_complete_base_info() further_complete_base_info()
Loading…
Cancel
Save