fix:🐛 fixbugs

main
jackluson 4 years ago
parent 9858170c36
commit 2ac624ff6c

@ -47,7 +47,7 @@ if __name__ == '__main__':
IdWorker = IdWorker()
page_limit = 10
record_total = count[0]
page_start = 3890
page_start = 0
error_funds = ['005086'] # 一些异常的基金详情页如果发现记录该基金的code
# 遍历从基金列表的单支基金
while(page_start < record_total):
@ -58,14 +58,15 @@ if __name__ == '__main__':
for record in results:
each_fund = FundInfo(
record[0], record[1], record[2], chrome_driver, morning_cookies)
# 从天天基金网上更新信息
# each_fund.update_fund_info_by_tiantian()
# 从晨星网上更新信息
is_normal = each_fund.get_fund_detail_info()
if is_normal == False or each_fund.found_date == '-':
is_normal = each_fund.go_fund_url()
if is_normal == False:
error_funds.append(each_fund.fund_code)
continue
each_fund.get_fund_base_info()
if each_fund.found_date == '-':
error_funds.append(each_fund.fund_code)
continue
# each_fund.get_asset_composition_info()
# 拼接sql需要的数据
snow_flake_id = IdWorker.get_id()
base_dict = {

@ -10,7 +10,6 @@ Copyright (c) 2020 Camel Lu
import re
from time import sleep
from IOFile import crawl_html
from bs4 import BeautifulSoup
from utils import parse_cookiestr, set_cookies, login_site
@ -41,6 +40,40 @@ class FundInfo:
self.risk_statistics = dict() # 阿尔法 贝塔 R平方值
# 处理基金详情页跳转
def login_morning_star(self, cookie_str=None):
login_url = 'https://www.morningstar.cn/membership/signin.aspx'
if self.chrome_driver == None:
from selenium import webdriver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--no-sandbox")
# chrome_driver = webdriver.Chrome("/usr/local/chromedriver")
self.chrome_driver = webdriver.Chrome(options=chrome_options)
self.chrome_driver.set_page_load_timeout(12000)
"""
模拟登录,支持两种方式
1. 设置已经登录的cookie
2. 输入账号密码验证码登录验证码识别正确率30%识别识别支持重试
"""
if cookie_str:
set_cookies(self.chrome_driver,
login_url, cookie_str)
else:
if self.morning_cookies == None:
login_status = login_site(
self.chrome_driver, login_url)
if login_status:
print('login success')
sleep(3)
else:
print('login fail')
exit()
# 获取网站cookie
morning_cookies = self.chrome_driver.get_cookies()
else:
self.morning_cookies = self.chrome_driver.get_cookies()
# print('cookies', self.morning_cookies) # 打印设置成功的cookie
# 更新基金信息从晨星网上抓取利用selinum原理
def go_fund_url(self, cookie_str=None):
self.login_morning_star(cookie_str)
morning_fund_selector_url = "https://www.morningstar.cn/quicktake/" + \

Loading…
Cancel
Save