feat: add fund statistic

main
jackluson 4 years ago
parent eafe742e32
commit 781268a790

@ -35,7 +35,7 @@
#### 5. 晨星基金经理
> 爬取基金详情页的数据,据此爬取基金经理数据
<img src="./screenshot/fund_manager.png" style="zoom:50%;" />
> <img src="./screenshot/fund_manager.png" style="zoom:50%;" />
### 技术点
@ -64,4 +64,12 @@
4. 如何保证循环当前页与浏览器当前页一致
5. 多线程爬取时,线程锁
以上问题,我都做了相对应的处理,如果有问题的话,欢迎提 issue私聊star。
以上问题,我都做了相对应的处理。
### 数据汇总
基于上面的数据,简单做了如下数据汇总,统计股票在这些基金中出现的频率,可用于投资理财辅助,如图:
<img src="./screenshot/fund_statistic.png" />
如果有问题,有兴趣的话,欢迎提 issue私聊star。

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

@ -0,0 +1,57 @@
# -*- coding:UTF-8 -*-
'''
Desc: 从基金的持仓中统计股票出现频率
File: /index.py
Project: src
File Created: Monday, 22nd March 2021 12:08:36 am
Author: luxuemin2108@gmail.com
-----
Copyright (c) 2020 Camel Lu
'''
import pymysql
from pprint import pprint
connect = pymysql.connect(host='127.0.0.1', user='root',
password='xxx', db='fund_work', charset='utf8')
cursor = connect.cursor()
if __name__ == '__main__':
print('login')
page_start = 0
page_limit = 10000
stock_sql_join = ''
for index in range(10):
stock_sql_join = stock_sql_join + \
"t.top_stock_%s_code, t.top_stock_%s_name" % (
str(index), str(index)) + ","
# print(stock_sql_join[0:-1])
stock_sql_join = stock_sql_join[0:-1]
# print(stock_sql_join)
sql_query_season = "SELECT t.fund_code," + stock_sql_join + \
" FROM fund_morning_stock_info as t WHERE t.season_number = '2021-s1' AND t.stock_position_total > 20 LIMIT %s, %s ;"
cursor.execute(sql_query_season, [page_start, page_limit]) # 执行sql语句
results = cursor.fetchall() # 获取查询的所有记录
# pprint(results)
code_dict = dict()
for result in results:
# print(result)
for index in range(1, len(result), 2):
code = result[index]
name = result[index + 1]
key = str(code) + '-' + str(name)
if(key in code_dict and code != None):
code_dict[key] = code_dict[key] + 1
else:
code_dict[key] = 1
filer_dict = dict()
for key, value in code_dict.items(): # for (key,value) in girl_dict.items() 这样加上括号也可以
if value > 100 and key != None:
filer_dict[key] = value
# print(key + ":" + str(value))
list = sorted(filer_dict.items(), key=lambda x: x[1], reverse=True)
pprint(list)
# pprint(dir(code_dict))
# filer_dict = dict((name, getattr(code_dict, name))
# for name in dir(code_dict) if not (name == None or getattr(code_dict, name) > int(1)))
# pprint(filer_dict)

@ -18,7 +18,7 @@ from selenium.common.exceptions import NoSuchElementException
class FundSpider:
# 初始化定义,利用基金代码、基金名称进行唯一化
def __init__(self, code, namecode, name, chrome_driver, morning_cookies):
self.season_number = '2021-1s'
self.season_number = '2021-s1'
self.fund_code = code # 基金代码,需要初始化赋值
self.fund_name = name # 基金名称,需要初始化赋值
self.morning_star_code = namecode # 基金编码,晨星网特有,需要建立索引表

@ -0,0 +1,57 @@
# -*- coding:UTF-8 -*-
'''
Desc: 从基金的持仓中统计股票出现频率
File: /index.py
Project: src
File Created: Monday, 22nd March 2021 12:08:36 am
Author: luxuemin2108@gmail.com
-----
Copyright (c) 2020 Camel Lu
'''
import pymysql
from pprint import pprint
connect = pymysql.connect(host='127.0.0.1', user='root',
password='xxx', db='fund_work', charset='utf8')
cursor = connect.cursor()
if __name__ == '__main__':
print('login')
page_start = 0
page_limit = 10000
stock_sql_join = ''
for index in range(10):
stock_sql_join = stock_sql_join + \
"t.top_stock_%s_code, t.top_stock_%s_name" % (
str(index), str(index)) + ","
# print(stock_sql_join[0:-1])
stock_sql_join = stock_sql_join[0:-1]
# print(stock_sql_join)
sql_query_season = "SELECT t.fund_code," + stock_sql_join + \
" FROM fund_morning_stock_info as t WHERE t.season_number = '2021-s1' AND t.stock_position_total > 20 LIMIT %s, %s ;"
cursor.execute(sql_query_season, [page_start, page_limit]) # 执行sql语句
results = cursor.fetchall() # 获取查询的所有记录
# pprint(results)
code_dict = dict()
for result in results:
# print(result)
for index in range(1, len(result), 2):
code = result[index]
name = result[index + 1]
key = str(code) + '-' + str(name)
if(key in code_dict and code != None):
code_dict[key] = code_dict[key] + 1
else:
code_dict[key] = 1
filer_dict = dict()
for key, value in code_dict.items(): # for (key,value) in girl_dict.items() 这样加上括号也可以
if value > 100 and key != None:
filer_dict[key] = value
# print(key + ":" + str(value))
list = sorted(filer_dict.items(), key=lambda x: x[1], reverse=True)
pprint(list)
# pprint(dir(code_dict))
# filer_dict = dict((name, getattr(code_dict, name))
# for name in dir(code_dict) if not (name == None or getattr(code_dict, name) > int(1)))
# pprint(filer_dict)

@ -101,7 +101,7 @@ def login_site(chrome_driver, site_url, redirect_url=None):
password = chrome_driver.find_element_by_id('pwdValue')
check_code = chrome_driver.find_element_by_id('txtCheckCode')
username.send_keys('18219112108@163.com')
password.send_keys('w780880')
password.send_keys('xxxx')
count = 1
flag = True
while count < 10 and flag:

Loading…
Cancel
Save