From 9bf86a3d4e1dd7754503a1b9f772035698b712af Mon Sep 17 00:00:00 2001 From: jackluson <18219112108@163.com> Date: Thu, 28 Oct 2021 23:43:09 +0800 Subject: [PATCH] feat: 2021-q3 --- log/crawler.log | 0 main.py | 36 ++++++++++++++++++++++++++++++++++++ src/__init__.py | 0 src/acquire_fund_snapshot.py | 18 ++++++++++++------ src/db/connect.py | 3 ++- 5 files changed, 50 insertions(+), 7 deletions(-) create mode 100644 log/crawler.log create mode 100644 main.py create mode 100644 src/__init__.py diff --git a/log/crawler.log b/log/crawler.log new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..44f4f0c --- /dev/null +++ b/main.py @@ -0,0 +1,36 @@ +''' +Desc: +File: /main.py +Project: fund-morning-star-crawler +File Created: Thursday, 28th October 2021 10:51:07 pm +Author: luxuemin2108@gmail.com +----- +Copyright (c) 2021 Camel Lu +''' + +import logging +import os +import sys + +sys.path.append('./src') + +from src.acquire_fund_snapshot import get_fund_list + + +def main(): + input_value = input("请输入下列序号执行操作:\n \ + 1.“快照” \n \ + 2.“行业个股”\n \ + 3.“股票日更”\n \ + 4.“个股+日更”\n \ + 5.“财务指标”\n \ + 6.“A股估值”\n \ + 输入:") + if input_value == '1' or input_value == '快照': + get_fund_list() # 执行申万行业信息入库 + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s', + filename='log/crawler.log', filemode='a', level=logging.INFO) + main() diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/acquire_fund_snapshot.py b/src/acquire_fund_snapshot.py index 458d4e8..f5f0bc1 100644 --- a/src/acquire_fund_snapshot.py +++ b/src/acquire_fund_snapshot.py @@ -8,17 +8,22 @@ Author: luxuemin2108@gmail.com Copyright (c) 2020 Camel Lu ''' -import re import math import os +import re +import sys + +sys.path.append(os.getcwd() + '/src') + from time import sleep -from bs4 import BeautifulSoup import pandas as pd +from bs4 import BeautifulSoup from selenium.webdriver.support.ui import WebDriverWait + +from db.connect import connect from lib.mysnowflake import IdWorker -from utils.login import login_morning_star from utils.index import get_star_count -from db.connect import connect +from utils.login import login_morning_star connect_instance = connect() cursor = connect_instance.cursor() @@ -55,8 +60,8 @@ def get_fund_list(): morning_fund_selector_url = "https://www.morningstar.cn/fundselect/default.aspx" chrome_driver = login_morning_star(morning_fund_selector_url, False) # 定义起始页码 - page_num = 443 - page_count = 25 + page_num = 9 + page_count = 25 # 晨星固定分页数 page_num_total = math.ceil(int(chrome_driver.find_element_by_xpath( '/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count) @@ -138,6 +143,7 @@ def get_fund_list(): cursor.executemany(sql_insert, fund_list) connect_instance.commit() # print('fund_list', fund_list) + # 输出为csv文件 with open(result_dir + output_file_name, 'a') as csv_file: for fund_item in fund_list: output_line = ', '.join(str(x) for x in fund_item) + '\n' diff --git a/src/db/connect.py b/src/db/connect.py index 18b3cae..fde5e88 100644 --- a/src/db/connect.py +++ b/src/db/connect.py @@ -1,6 +1,7 @@ -import pymysql import os + +import pymysql from dotenv import load_dotenv