wip: 💄refactor sql code

main
jackluson 3 years ago
parent 8e3cff4068
commit 660570934e

@ -11,13 +11,14 @@ Copyright (c) 2020 Camel Lu
import math
from threading import Thread, Lock, current_thread
from utils import parse_cookiestr, set_cookies, login_site
from fund_info_crawler import FundSpider
from db.connect import connect
from lib.mysnowflake import IdWorker
from time import sleep, time
from pprint import pprint
import pandas
from db.connect import connect
from fund_info_crawler import FundSpider
from lib.mysnowflake import IdWorker
from utils import parse_cookiestr, set_cookies, login_site
from sql_model.fund_query import FundQuery
connect_instance = connect()
cursor = connect_instance.cursor()
@ -63,21 +64,12 @@ def generate_insert_sql(target_dict, table_name, ignore_list):
if __name__ == '__main__':
# 过滤没有股票持仓的基金
sql_count = "SELECT COUNT(1) FROM fund_morning_base \
LEFT JOIN fund_morning_snapshot ON fund_morning_snapshot.fund_code = fund_morning_base.fund_code \
WHERE fund_morning_base.fund_cat NOT LIKE '%%货币%%' \
AND fund_morning_base.fund_cat NOT LIKE '%%纯债基金%%' \
AND fund_morning_base.fund_cat NOT LIKE '目标日期' \
AND fund_morning_base.fund_name NOT LIKE '%%C' \
AND fund_morning_base.fund_name NOT LIKE '%%B' \
AND fund_morning_base.fund_cat NOT LIKE '%%短债基金%%'"
cursor.execute(sql_count)
count = cursor.fetchone() # 获取记录条数
print('count', count[0])
fund_query = FundQuery()
record_total = fund_query.get_crawler_quarter_total() # 获取记录条数
IdWorker = IdWorker()
page_limit = 5
record_total = count[0]
page_start = 0
error_funds = []
output_catch_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
@ -285,19 +277,19 @@ if __name__ == '__main__':
# "start": 8300,
# "end": record_total
# }]
for i in range(1):
skip_num = 100
# print(i * step_num + skip_num, (i+1) * step_num)
# start = steps[i]['start']
# end = steps[i]['end']
start = i * step_num
end = (i + 1) * step_num
t = Thread(target=crawlData, args=(start, end))
t.setDaemon(True)
threaders.append(t)
t.start()
for threader in threaders:
threader.join()
# for i in range(1):
# skip_num = 100
# # print(i * step_num + skip_num, (i+1) * step_num)
# # start = steps[i]['start']
# # end = steps[i]['end']
# start = i * step_num
# end = (i + 1) * step_num
# t = Thread(target=crawlData, args=(start, end))
# t.setDaemon(True)
# threaders.append(t)
# t.start()
# for threader in threaders:
# threader.join()
stop = time()
print('run time is %s' % (stop - start))
print('error_funds', error_funds)

@ -0,0 +1,35 @@
'''
Desc: 基金查询sql类
File: /fund_query.py
Project: sql-model
File Created: Friday, 7th May 2021 11:58:59 pm
Author: luxuemin2108@gmail.com
-----
Copyright (c) 2021 Camel Lu
'''
from threading import Lock
from db.connect import connect
connect_instance = connect()
class FundQuery:
def __init__(self):
self.cursor = connect_instance.cursor()
self.lock = Lock()
# 需要爬取季度性信息的基金(B,C类基金除外因为B、C基金大部分信息与A类一致)
def get_crawler_quarter_total(self, ):
# 过滤没有股票持仓的基金
sql_count = "SELECT COUNT(1) FROM fund_morning_base \
LEFT JOIN fund_morning_snapshot ON fund_morning_snapshot.fund_code = fund_morning_base.fund_code \
WHERE fund_morning_base.fund_cat NOT LIKE '%%货币%%' \
AND fund_morning_base.fund_cat NOT LIKE '%%纯债基金%%' \
AND fund_morning_base.fund_cat NOT LIKE '目标日期' \
AND fund_morning_base.fund_name NOT LIKE '%%C' \
AND fund_morning_base.fund_name NOT LIKE '%%B' \
AND fund_morning_base.fund_cat NOT LIKE '%%短债基金%%'"
self.cursor.execute(sql_count)
count = self.cursor.fetchone()
return count[0]
Loading…
Cancel
Save