feat:add base to main

main
jackluson 3 years ago
parent 9bf86a3d4e
commit 475941b6de

@ -9,25 +9,23 @@ Copyright (c) 2021 Camel Lu
'''
import logging
import os
import sys
sys.path.append('./src')
from src.acquire_fund_snapshot import get_fund_list
from src.acquire_fund_base import acquire_fund_base
def main():
input_value = input("请输入下列序号执行操作:\n \
1.快照 \n \
2.行业个股\n \
3.股票日更\n \
4.个股+日更\n \
5.财务指标\n \
6.A股估值\n \
2.新基入库\n \
输入")
if input_value == '1' or input_value == '快照':
get_fund_list() # 执行申万行业信息入库
page_index = 1
get_fund_list(page_index) # 执行申万行业信息入库
elif input_value == '2' or input_value == '新基入库':
acquire_fund_base() # 执行行业股票信息入库
if __name__ == '__main__':

File diff suppressed because it is too large Load Diff

@ -7,7 +7,6 @@ Author: luxuemin2108@gmail.com
-----
Copyright (c) 2020 Camel Lu
'''
from time import sleep
from threading import Lock
from utils.login import login_morning_star
from utils.index import bootstrap_thread
@ -16,21 +15,20 @@ from lib.mysnowflake import IdWorker
from sql_model.fund_query import FundQuery
from sql_model.fund_insert import FundInsert
if __name__ == '__main__':
def acquire_fund_base():
lock = Lock()
each_fund_query = FundQuery()
each_fund_insert = FundInsert()
record_total = each_fund_query.get_fund_count_from_snapshot_no_exist() # 获取记录条数
IdWorker = IdWorker()
idWorker = IdWorker()
print('record_total', record_total)
error_funds = [] # 一些异常的基金详情页如果发现记录该基金的code
def crawlData(start, end):
login_url = 'https://www.morningstar.cn/membership/signin.aspx'
chrome_driver = login_morning_star(login_url, True)
chrome_driver = login_morning_star(login_url, False)
page_start = start
page_limit = 10
# 遍历从基金列表的单支基金
@ -56,7 +54,7 @@ if __name__ == '__main__':
continue
# 拼接sql需要的数据
lock.acquire()
snow_flake_id = IdWorker.get_id()
snow_flake_id = idWorker.get_id()
lock.release()
base_dict = {
'id': snow_flake_id,
@ -71,5 +69,9 @@ if __name__ == '__main__':
page_start = page_start + page_limit
print('page_start', page_start)
chrome_driver.close()
bootstrap_thread(crawlData, record_total, 2)
bootstrap_thread(crawlData, record_total, 4)
print('error_funds', error_funds)
if __name__ == '__main__':
acquire_fund_base()

@ -22,7 +22,7 @@ from selenium.webdriver.support.ui import WebDriverWait
from db.connect import connect
from lib.mysnowflake import IdWorker
from utils.index import get_star_count
from utils.index import get_star_count, bootstrap_thread
from utils.login import login_morning_star
connect_instance = connect()
@ -56,13 +56,12 @@ def text_to_be_present_in_element(locator, text, next_page_locator):
return _predicate
def get_fund_list():
def get_fund_list(page_index):
morning_fund_selector_url = "https://www.morningstar.cn/fundselect/default.aspx"
chrome_driver = login_morning_star(morning_fund_selector_url, False)
# 定义起始页码
page_num = 9
page_count = 25 # 晨星固定分页数
page_num_total = math.ceil(int(chrome_driver.find_element_by_xpath(
page_total = math.ceil(int(chrome_driver.find_element_by_xpath(
'/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count)
result_dir = './output/'
@ -71,22 +70,22 @@ def get_fund_list():
env_snapshot_table_name = os.getenv('snapshot_table_name')
output_file_name = env_snapshot_table_name + ".csv"
# 设置表头
if page_num == 1:
if page_index == 1:
with open(result_dir + output_file_name, 'w+') as csv_file:
csv_file.write(output_head)
while page_num <= page_num_total:
while page_index <= page_total:
# 求余
remainder = page_num_total % 10
remainder = page_total % 10
# 判断是否最后一页
num = (remainder +
2) if page_num > (page_num_total - remainder) else 12
2) if page_index > (page_total - remainder) else 12
xpath_str = '/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/a[%s]' % (
num)
print('page_num', page_num)
print('page_index', page_index)
# 等待直到当前页样式判断等于page_num
WebDriverWait(chrome_driver, timeout=600).until(text_to_be_present_in_element(
"/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/span[@style='margin-right:5px;font-weight:Bold;color:red;']", str(page_num), xpath_str))
"/html/body/form/div[8]/div/div[4]/div[3]/div[3]/div[1]/span[@style='margin-right:5px;font-weight:Bold;color:red;']", str(page_index), xpath_str))
sleep(1)
# 列表用于存放爬取的数据
id_list = [] # 雪花id
@ -155,11 +154,12 @@ def get_fund_list():
# 点击下一页
next_page.click()
sleep(3)
page_num += 1
page_index += 1
chrome_driver.close()
print('end')
# chrome_driver.close()
if __name__ == "__main__":
fund_list = get_fund_list()
page_index = 1
fund_list = get_fund_list(page_index)

Loading…
Cancel
Save