chore: add get_season_index utils && comment code

main
jackluson 4 years ago
parent 781268a790
commit 8d14757cea

1
.gitignore vendored

@ -6,6 +6,7 @@ code-record/*
*$py.class
.vscode/
*fund_morning_star.csv
abnormal
# C extensions

@ -19,7 +19,7 @@ from pprint import pprint
import pymysql
import pandas
connect = pymysql.connect(host='127.0.0.1', user='root',
password='rootroot', db='fund_work', charset='utf8')
password='xxxxx', db='fund_work', charset='utf8')
cursor = connect.cursor()
lock = Lock()
@ -123,6 +123,7 @@ if __name__ == '__main__':
each_fund = FundSpider(
record[0], record[1], record[2], chrome_driver, morning_cookies)
is_normal = each_fund.go_fund_url()
# 是否能正常跳转到基金详情页没有的话写入csv,退出当前循环
if is_normal == False:
lock.acquire()
error_funds.append(each_fund.fund_code)
@ -139,8 +140,10 @@ if __name__ == '__main__':
each_fund.get_fund_manager_info() # 基金经理模块
each_fund.get_fund_morning_rating() # 基金晨星评级
each_fund.get_fund_qt_rating() # 基金风险评级
# 判断是否有股票持仓,有则爬取
if each_fund.stock_position['total'] != '0.00' and each_fund.total_asset != None:
each_fund.get_asset_composition_info()
# 爬取过程中是否有异常
if each_fund._is_trigger_catch == True:
lock.acquire()
fund_infos = [each_fund.fund_code, each_fund.morning_star_code,
@ -171,6 +174,7 @@ if __name__ == '__main__':
tuple(manager_dict.values()))
connect.commit()
lock.release()
# 季度信息 TODO: 对比数据更新时间field
season_dict = {
'id': snow_flake_id,
'season_number': each_fund.season_number,

@ -18,7 +18,7 @@ from selenium.common.exceptions import NoSuchElementException
class FundSpider:
# 初始化定义,利用基金代码、基金名称进行唯一化
def __init__(self, code, namecode, name, chrome_driver, morning_cookies):
self.season_number = '2021-s1'
self.season_number = '2021-s1' # TODO: get season_number by current time
self.fund_code = code # 基金代码,需要初始化赋值
self.fund_name = name # 基金名称,需要初始化赋值
self.morning_star_code = namecode # 基金编码,晨星网特有,需要建立索引表
@ -247,7 +247,7 @@ class FundSpider:
file_name = './abnormal/qt_rating-' + self.fund_code + "-no_such_element.png"
def get_fund_season_info(self):
# 总资产
# 总资产 TODO: 增加一个数据更新时间field
self.total_asset = self.get_element_text_by_class_name(
"asset", 'qt_base')
# 投资风格
@ -259,7 +259,7 @@ class FundSpider:
# 最差六个月回报
self.june_month_retracement = self.get_element_text_by_class_name(
"r6", 'qt_worst')
# 获取股票总仓位、前十大持仓、债券总仓位、前五大持仓
# 获取股票总仓位、前十大持仓、债券总仓位、前五大持仓 TODO: 增加一个数据更新时间field
total = self.get_element_text_by_class_name(
"stock", 'qt_asset')
self.stock_position["total"] = total if total != None else '0.00'
@ -291,6 +291,7 @@ class FundSpider:
# 获取标准差
# standard_deviation = self._chrome_driver.find_element_by_id(
# "qt_risk").find_element_by_xpath('li[16]').text
# TODO: 增加一个数据更新时间field
standard_deviation = self.get_element_text_by_xpath(
'li[16]', 'qt_risk')
if standard_deviation != None:

@ -1,6 +1,7 @@
from urllib import parse
import time
import datetime
import os
@ -151,3 +152,20 @@ def parse_csv(datafile):
counter += 1
return data
def get_season_index(input_date):
year = time.strftime("%Y", time.localtime())
boundary_date_list = ['03-31', '06-30', '09-30', '12-31']
input_date_strptime = datetime.datetime.strptime(
year + '-' + input_date, '%Y-%m-%d')
index = 1
for idx in range(len(boundary_date_list)):
join_date = year + '-' + boundary_date_list[idx]
season_date_strptime = datetime.datetime.strptime(
join_date, '%Y-%m-%d')
if input_date_strptime <= season_date_strptime:
index = idx + 1
break
return index

Loading…
Cancel
Save