@ -7,7 +7,7 @@ Author: luxuemin2108@gmail.com
- - - - -
Copyright ( c ) 2020 Camel Lu
'''
import os
import math
from utils import parse_cookiestr , set_cookies , login_site
from fund_info_crawler import FundSpider
@ -15,7 +15,8 @@ from lib.mysnowflake import IdWorker
import pymysql
from db . connect import connect
cursor = connect . cursor ( )
connect_instance = connect ( )
cursor = connect_instance . cursor ( )
def login ( ) :
@ -36,22 +37,26 @@ def login():
if __name__ == ' __main__ ' :
chrome_driver = login ( )
morning_cookies = chrome_driver . get_cookies ( )
# 获取数据库的基金列表
sql_count = " SELECT count(*) FROM fund_morning_snapshot WHERE fund_code IS NOT NULL AND morning_star_code IS NOT NULL "
env_snapshot_table_name = os . getenv ( ' snapshot_table_name ' )
sql_count = " SELECT count(*) FROM " + env_snapshot_table_name + \
" WHERE fund_code NOT IN (SELECT fund_code FROM fund_morning_base); "
cursor . execute ( sql_count )
count = cursor . fetchone ( ) # 获取记录条数
print ( ' count ' , count [ 0 ] )
chrome_driver = login ( )
morning_cookies = chrome_driver . get_cookies ( )
IdWorker = IdWorker ( )
page_limit = 10
record_total = count [ 0 ]
page_start = 0
error_funds = [ ' 005086 ' ] # 一些异常的基金详情页, 如果发现记录该基金的code
error_funds = [ ] # 一些异常的基金详情页, 如果发现记录该基金的code
# 遍历从基金列表的单支基金
while ( page_start < record_total ) :
sql = " SELECT fund_code, morning_star_code, fund_name FROM fund_morning_snapshot WHERE fund_code IS NOT NULL AND morning_star_code IS NOT NULL ORDER BY fund_code LIMIT %s , %s "
# 从fund_morning_snapshot_2021_q1 查出 fund_morning_base 中不存在的基金
sql = " SELECT fund_code, morning_star_code, fund_name FROM " + env_snapshot_table_name + \
" WHERE fund_code NOT IN (SELECT fund_code FROM fund_morning_base) ORDER BY fund_code LIMIT %s , %s "
cursor . execute (
sql , [ page_start , page_limit ] ) # 执行sql语句
results = cursor . fetchall ( ) # 获取查询的所有记录
@ -64,6 +69,7 @@ if __name__ == '__main__':
error_funds . append ( each_fund . fund_code )
continue
each_fund . get_fund_base_info ( )
# 去掉没有成立时间的
if each_fund . found_date == ' - ' :
error_funds . append ( each_fund . fund_code )
continue
@ -86,14 +92,14 @@ if __name__ == '__main__':
continue
update_values = update_values + ' {0} =VALUES( {0} ), ' . format ( key )
# 入库,不存在则创建,存在则更新
base_sql_insert = " INSERT INTO {table} ( {keys} ) VALUES ( {values} ) ON DUPLICATE KEY UPDATE {update_values} ; " . format (
base_sql_insert = " INSERT INTO {table} ( {keys} ) VALUES ( {values} ) ON DUPLICATE KEY UPDATE {update_values} ; " . format (
table = ' fund_morning_base ' ,
keys = keys ,
values = values ,
update_values = update_values [ 0 : - 1 ]
)
cursor . execute ( base_sql_insert , tuple ( base_dict . values ( ) ) )
connect . commit ( )
connect _instance . commit ( )
page_start = page_start + page_limit
print ( ' page_start ' , page_start )
chrome_driver . close ( )