chore: rearrage code & directory

main
jackluson 4 years ago
parent 905b4362f4
commit 46af8a5da2

5
.gitignore vendored

@ -1,7 +1,12 @@
# Byte-compiled / optimized / DLL files
__pycache__/
code-record/*
!.gitkeep
*.py[cod]
*$py.class
.vscode/
*fund_morning_star.csv
# C extensions
*.so

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.5 KiB

@ -0,0 +1,57 @@
absl-py==0.11.0
astor==0.8.1
autopep8==1.5.4
beautifulsoup4==4.5.3
bs4==0.0.1
cached-property==1.5.2
certifi==2020.12.5
chardet==3.0.4
cycler==0.10.0
fake-useragent==0.1.11
gast==0.4.0
google-pasta==0.2.0
grpcio==1.34.0
h5py==3.1.0
idna==2.6
importlib-metadata==3.3.0
joblib==1.0.0
Keras==2.2.4
Keras-Applications==1.0.8
Keras-Preprocessing==1.1.2
kiwisolver==1.3.1
lxml==4.6.2
Markdown==3.3.3
matplotlib==3.3.3
ntplib==0.3.4
numpy==1.19.5
opencv-python==4.5.1.48
pandas==1.1.5
Pillow==8.1.0
protobuf==3.14.0
pycodestyle==2.6.0
PyMySQL==1.0.2
pyparsing==2.4.7
pysnowflake==0.1.3
pytesseract==0.3.7
python-dateutil==2.8.1
pytz==2020.5
PyYAML==5.3.1
requests==2.18.4
scikit-learn==0.24.0
scipy==1.6.0
selenium==3.11.0
six==1.15.0
sklearn==0.0
tensorboard==1.14.0
tensorflow==1.14.0
tensorflow-estimator==1.14.0
termcolor==1.1.0
threadpoolctl==2.1.0
toml==0.10.2
tornado==6.1
typing-extensions==3.7.4.3
urllib3==1.22
Werkzeug==1.0.1
wrapcache==1.0.8
wrapt==1.12.1
zipp==3.4.0

@ -95,7 +95,7 @@ def get_fund_list(cookie_str=None):
page_count = 25
page_num_total = math.ceil(int(chrome_driver.find_element_by_xpath(
'/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count)
# 爬取共306页
result_dir = './output/'
output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \
'类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n'
@ -162,14 +162,13 @@ def get_fund_list(cookie_str=None):
rate_of_return.append(return_value)
print('数据准备完毕')
fund_df = pd.DataFrame({'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat,
fund_df = pd.DataFrame({'id': id_list, 'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat,
'fund_rating_3': fund_rating_3, 'fund_rating_5': fund_rating_5, 'rate_of_return': rate_of_return})
sql_insert = "replace into fund_morning_star(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s, %s, %s)"
# print('fund_df', fund_df)
fund_list = fund_df.values.tolist()
# cursor.executemany(sql_insert, fund_list)
# connect.commit()
# sql_insert = "insert into fund_morning_star(`fund_code`, `fund_name`, `fund_cat`, `fund_rate_3`, `fund_rate_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s)"
# ALTER TABLE fund_morning_star MODIFY COLUMN update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
print('fund_list', fund_list)
with open(result_dir + 'fund_morning_star.csv', 'a') as csv_file:
for fund_item in fund_list:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -41,7 +41,7 @@ def identify_verification_code(chrome_driver, id="checkcodeImg"):
directory_time = time.strftime("%Y-%m-%d", time.localtime(time.time()))
# 获取到当前文件的目录,并检查是否有 directory_time 文件夹,如果不存在则自动新建 directory_time 文件
try:
file_Path = os.getcwd() + '/' + directory_time + '/'
file_Path = os.getcwd() + '/code-record/' + directory_time + '/'
if not os.path.exists(file_Path):
os.makedirs(file_Path)
print("目录新建成功:%s" % file_Path)
@ -52,7 +52,7 @@ def identify_verification_code(chrome_driver, id="checkcodeImg"):
try:
from selenium.webdriver.common.by import By
ele = chrome_driver.find_element(By.ID, id)
code_path = './' + directory_time + '/' + picture_time + '_code.png'
code_path = './code-record/' + directory_time + '/' + picture_time + '_code.png'
url = ele.screenshot(code_path)
if url:
print("%s :截图成功!!!" % url)
@ -99,7 +99,7 @@ def login_site(chrome_driver, site_url):
password = chrome_driver.find_element_by_id('pwdValue')
check_code = chrome_driver.find_element_by_id('txtCheckCode')
username.send_keys('18219112108@163.com')
password.send_keys('w780880')
password.send_keys('xxxx')
count = 1
flag = True
while count < 10 and flag:

Loading…
Cancel
Save