refactor: abstract db config

main
jackluson 3 years ago
parent 8cc4ed4a1e
commit 5328792bb0

@ -13,8 +13,8 @@ from utils import parse_cookiestr, set_cookies, login_site
from fund_info_crawler import FundSpider
from lib.mysnowflake import IdWorker
import pymysql
connect = pymysql.connect(host='127.0.0.1', user='root',
password='rootroot', db='fund_work', charset='utf8')
from db.connect import connect
cursor = connect.cursor()

@ -13,13 +13,12 @@ import math
from threading import Thread, Lock, current_thread
from utils import parse_cookiestr, set_cookies, login_site
from fund_info_crawler import FundSpider
from db.connect import connect
from lib.mysnowflake import IdWorker
from time import sleep, time
from pprint import pprint
import pymysql
import pandas
connect = pymysql.connect(host='127.0.0.1', user='root',
password='xxxxx', db='fund_work', charset='utf8')
cursor = connect.cursor()
lock = Lock()
@ -177,7 +176,7 @@ if __name__ == '__main__':
# 季度信息 TODO: 对比数据更新时间field
season_dict = {
'id': snow_flake_id,
'season_number': each_fund.season_number,
'quarter_index': each_fund.quarter_index,
'fund_code': each_fund.fund_code,
'investname_style': each_fund.investname_style,
'total_asset': each_fund.total_asset,
@ -204,7 +203,7 @@ if __name__ == '__main__':
'morning_star_rating_10': each_fund.morning_star_rating.get(10),
}
season_sql_insert = generate_insert_sql(
season_dict, 'fund_morning_season', ['id', 'season_number', 'fund_code'])
season_dict, 'fund_morning_season', ['id', 'quarter_index', 'fund_code'])
lock.acquire()
cursor.execute(season_sql_insert,
tuple(season_dict.values()))
@ -216,7 +215,7 @@ if __name__ == '__main__':
if float(stock_position_total) > 0:
stock_dict = {
'id': snow_flake_id,
'season_number': each_fund.season_number,
'quarter_index': each_fund.quarter_index,
'fund_code': each_fund.fund_code,
'stock_position_total': each_fund.stock_position.get('total'),
}
@ -232,7 +231,7 @@ if __name__ == '__main__':
market_key = prefix + 'market'
stock_dict[market_key] = temp_stock['stock_market']
stock_sql_insert = generate_insert_sql(
stock_dict, 'fund_morning_stock_info', ['id', 'season_number', 'fund_code'])
stock_dict, 'fund_morning_stock_info', ['id', 'quarter_index', 'fund_code'])
lock.acquire()
# print('stock_sql_insert', stock_sql_insert)
cursor.execute(stock_sql_insert,

@ -11,16 +11,14 @@ Copyright (c) 2020 Camel Lu
import re
import math
import os
import pymysql
from time import sleep
from bs4 import BeautifulSoup
import pandas as pd
from selenium.webdriver.support.ui import WebDriverWait
from lib.mysnowflake import IdWorker
from utils import parse_cookiestr, set_cookies, login_site, get_star_count
from db.connect import connect
connect = pymysql.connect(host='127.0.0.1', user='root',
password='rootroot', db='fund_work', charset='utf8')
cursor = connect.cursor()
'''
@ -83,7 +81,7 @@ def get_fund_list(cookie_str=None):
chrome_driver.get(morning_fund_selector_url) # 再次打开爬取页面
print(chrome_driver.get_cookies()) # 打印设置成功的cookie
# 定义起始页码
page_num = 1
page_num = 445
page_count = 25
page_num_total = math.ceil(int(chrome_driver.find_element_by_xpath(
'/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count)
@ -158,8 +156,8 @@ def get_fund_list(cookie_str=None):
fund_df = pd.DataFrame({'id': id_list, 'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat,
'fund_rating_3': fund_rating_3, 'fund_rating_5': fund_rating_5, 'rate_of_return': rate_of_return})
env_snapshot_table_name = os.getenv('snapshot_table_name')
sql_insert = "replace into " + env_snapshot_table_name + \
"(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s, %s, %s)"
sql_insert = "INSERT INTO " + env_snapshot_table_name + \
"(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) VALUES(%s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE fund_rating_3=VALUES(fund_rating_3), fund_rating_5=VALUES(fund_rating_5), rate_of_return=VALUES(rate_of_return);"
# print('fund_df', fund_df)
fund_list = fund_df.values.tolist()
cursor.executemany(sql_insert, fund_list)

@ -0,0 +1,20 @@
import pymysql
import os
from dotenv import load_dotenv
def connect():
load_dotenv()
env_db_host = os.getenv('db_host')
env_db_name = os.getenv('db_name')
env_db_user = os.getenv('db_user')
print('env_db_user', env_db_user)
env_db_password = os.getenv('db_password')
connect = pymysql.connect(
host=env_db_host, user=env_db_user, password=env_db_password, db=env_db_name, charset='utf8')
return connect
if __name__ == '__main__':
connect()

@ -18,7 +18,7 @@ from selenium.common.exceptions import NoSuchElementException
class FundSpider:
# 初始化定义,利用基金代码、基金名称进行唯一化
def __init__(self, code, namecode, name, chrome_driver, morning_cookies):
self.season_number = '2021-s1' # TODO: get season_number by current time
self.quarter_index = '2021-q1' # TODO: get quarter_index by current time
self.fund_code = code # 基金代码,需要初始化赋值
self.fund_name = name # 基金名称,需要初始化赋值
self.morning_star_code = namecode # 基金编码,晨星网特有,需要建立索引表

@ -11,12 +11,10 @@ Copyright (c) 2020 Camel Lu
import pymysql
from pprint import pprint
connect = pymysql.connect(host='127.0.0.1', user='root',
password='xxx', db='fund_work', charset='utf8')
cursor = connect.cursor()
from db.connect import connect
cursor = connect.cursor()
if __name__ == '__main__':
print('login')
page_start = 0
page_limit = 10000
stock_sql_join = ''
@ -28,7 +26,7 @@ if __name__ == '__main__':
stock_sql_join = stock_sql_join[0:-1]
# print(stock_sql_join)
sql_query_season = "SELECT t.fund_code," + stock_sql_join + \
" FROM fund_morning_stock_info as t WHERE t.season_number = '2021-s1' AND t.stock_position_total > 20 LIMIT %s, %s ;"
" FROM fund_morning_stock_info as t WHERE t.quarter_index = '2020-q4' AND t.stock_position_total > 20 LIMIT %s, %s ;"
cursor.execute(sql_query_season, [page_start, page_limit]) # 执行sql语句
results = cursor.fetchall() # 获取查询的所有记录
# pprint(results)

Loading…
Cancel
Save