fix: 🐛optimize query

main
jackluson 3 years ago
parent be21338725
commit 62bb072376

@ -91,7 +91,7 @@ if __name__ == '__main__':
# 开始爬取数据
quarter_index = each_fund.get_quarter_index() # 数据更新时间,如果不一致,不爬取下面数据
if quarter_index != each_fund.quarter_index:
print('quarter_index', quarter_index)
print('quarter_index', quarter_index, each_fund.update_date)
continue
each_fund.get_fund_season_info() # 基本季度性数据
@ -107,6 +107,8 @@ if __name__ == '__main__':
each_fund.fund_name, record[3],
each_fund.stock_position['total'],
page_start, each_fund._catch_detail]
output_line = ', '.join(str(x)
for x in fund_infos) + '\n'
fund_csv.write_season_catch_fund(False, output_line)
# 入库
lock.acquire()
@ -123,7 +125,6 @@ if __name__ == '__main__':
'brife': each_fund.manager.get('brife')
}
fund_insert.insert_fund_manger_info(manager_dict)
# 季度信息 TODO: 对比数据更新时间field
quarterly_dict = {
'id': snow_flake_id,
'quarter_index': each_fund.quarter_index,
@ -205,12 +206,12 @@ if __name__ == '__main__':
"end": 1500
}, {
"start": 1500,
"end": 3000
"end": 2500
}, {
"start": 3000,
"end": 4500
"start": 2500,
"end": 3500
}, {
"start": 4500,
"start": 3500,
"end": record_total
}]
for i in range(4):

@ -67,7 +67,6 @@ class FundSpider:
sleep(9)
# self._chrome_driver.execute_script('location.reload()')
#TODO: 选择元素相关抽离到一个专门类中
def get_element_text_by_class_name(self, class_name, parent_id):
try:
text = self._chrome_driver.find_element_by_id(
@ -77,7 +76,7 @@ class FundSpider:
self._is_trigger_catch = True
self._catch_detail = parent_id + '-' + class_name
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code, self.stock_position["total"])
'-', self.morning_star_code, self.stock_position["total"], class_name)
file_name = './abnormal/' + self.fund_code + \
'-' + parent_id + "-no_such_element.png"
# self._chrome_driver.save_screenshot(file_name)
@ -94,7 +93,7 @@ class FundSpider:
self._is_trigger_catch = True
self._catch_detail = id
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code, self.stock_position["total"])
'-', self.morning_star_code, self.stock_position["total"], id)
file_name = './abnormal/' + '-' + id + self.fund_code + "-no_such_element.png"
# self._chrome_driver.save_screenshot(file_name)
# driver.get_screenshot_as_file(file_name)
@ -114,7 +113,7 @@ class FundSpider:
self._is_trigger_catch = True
self._catch_detail = xpath
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code, self.stock_position["total"])
'-', self.morning_star_code, self.stock_position["total"], xpath)
file_name = './abnormal/' + \
self.fund_code + '-' + xpath + "-no_such_element.png"
# self._chrome_driver.save_screenshot(file_name)
@ -154,7 +153,7 @@ class FundSpider:
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code)
'-', self.morning_star_code, 'get_fund_manager_info')
file_name = './abnormal/manager-' + self.fund_code + "-no_such_element.png"
# self._chrome_driver.save_screenshot(file_name)
# driver.get_screenshot_as_file(file_name)
@ -179,7 +178,7 @@ class FundSpider:
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code)
'-', self.morning_star_code, 'get_fund_morning_rating')
file_name = './abnormal/morning_rating-' + \
self.fund_code + "-no_such_element.png"
# 风险评级
@ -211,7 +210,7 @@ class FundSpider:
except NoSuchElementException:
self._is_trigger_catch = True
print('error_fund_info:', self.fund_code,
'-', self.morning_star_code)
'-', self.morning_star_code, 'get_fund_qt_rating')
file_name = './abnormal/qt_rating-' + self.fund_code + "-no_such_element.png"
def get_fund_season_info(self):
@ -259,7 +258,6 @@ class FundSpider:
# 获取标准差
# standard_deviation = self._chrome_driver.find_element_by_id(
# "qt_risk").find_element_by_xpath('li[16]').text
# TODO: 增加一个数据更新时间field
standard_deviation = self.get_element_text_by_xpath(
'li[16]', 'qt_risk')
if standard_deviation != None:
@ -303,7 +301,6 @@ class FundSpider:
self.ten_top_stock_list.append(temp_stock_info)
def get_quarter_index(self):
# 总资产 TODO: 增加一个数据更新时间field
update_date = self.get_element_text_by_class_name(
"date4", 'aspnetForm')
if(update_date == None):

@ -14,6 +14,7 @@ from db.connect import connect
class FundQuery:
def __init__(self):
self.quarter_index = '2021-Q1'
connect_instance = connect()
self.connect_instance = connect_instance
self.cursor = connect_instance.cursor()
@ -22,15 +23,16 @@ class FundQuery:
# 需要爬取季度性信息的基金(B,C类基金除外因为B、C基金大部分信息与A类一致)
def get_crawler_quarter_fund_total(self):
# 过滤没有股票持仓的基金
sql_count = "SELECT COUNT(1) FROM fund_morning_base \
LEFT JOIN fund_morning_snapshot ON fund_morning_snapshot.fund_code = fund_morning_base.fund_code \
WHERE fund_morning_base.fund_cat NOT LIKE '%%货币%%' \
AND fund_morning_base.fund_cat NOT LIKE '%%纯债基金%%' \
AND fund_morning_base.fund_cat NOT LIKE '目标日期' \
AND fund_morning_base.fund_name NOT LIKE '%%C' \
AND fund_morning_base.fund_name NOT LIKE '%%B' \
AND fund_morning_base.fund_cat NOT LIKE '%%短债基金%%'"
self.cursor.execute(sql_count)
sql_count = "SELECT COUNT(1) FROM fund_morning_base as a \
WHERE a.fund_cat NOT LIKE '%%货币%%' \
AND a.fund_cat NOT LIKE '%%纯债基金%%' \
AND a.fund_cat NOT LIKE '目标日期' \
AND a.fund_name NOT LIKE '%%C' \
AND a.fund_name NOT LIKE '%%B' \
AND a.fund_cat NOT LIKE '%%短债基金%%' \
AND a.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \
WHERE b.quarter_index = %s);"
self.cursor.execute(sql_count, [self.quarter_index])
count = self.cursor.fetchone()
return count[0]
@ -39,18 +41,17 @@ class FundQuery:
sql = "SELECT t.fund_code,\
t.morning_star_code, t.fund_name, t.fund_cat \
FROM fund_morning_base as t \
LEFT JOIN fund_morning_snapshot as f ON f.fund_code = t.fund_code \
WHERE t.fund_cat NOT LIKE '%%货币%%' \
AND t.fund_cat NOT LIKE '%%纯债基金%%' \
AND t.fund_cat NOT LIKE '目标日期' \
AND t.fund_cat NOT LIKE '%%短债基金%%' \
AND t.fund_name NOT LIKE '%%C' \
AND t.fund_name NOT LIKE '%%B' \
ORDER BY f.fund_rating_5 DESC,f.fund_rating_3 DESC, \
t.fund_cat, t.fund_code LIMIT %s, %s"
AND t.fund_code NOT IN( SELECT fund_code FROM fund_morning_quarter as b \
WHERE b.quarter_index = %s) LIMIT %s, %s;"
self.lock.acquire()
self.cursor.execute(
sql, [page_start, page_limit]) # 执行sql语句
sql, [self.quarter_index, page_start, page_limit]) # 执行sql语句
results = self.cursor.fetchall() # 获取查询的所有记录
self.lock.release()
return results

Loading…
Cancel
Save