diff --git a/.gitignore b/.gitignore index db60abb..6a766d2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,12 @@ # Byte-compiled / optimized / DLL files __pycache__/ +code-record/* +!.gitkeep *.py[cod] *$py.class +.vscode/ +*fund_morning_star.csv + # C extensions *.so diff --git a/2021-02-20/2021-02-20-20_35_27_code.png b/2021-02-20/2021-02-20-20_35_27_code.png deleted file mode 100644 index 2083060..0000000 Binary files a/2021-02-20/2021-02-20-20_35_27_code.png and /dev/null differ diff --git a/2021-02-20/2021-02-20-20_38_29_code.png b/2021-02-20/2021-02-20-20_38_29_code.png deleted file mode 100644 index 5a03968..0000000 Binary files a/2021-02-20/2021-02-20-20_38_29_code.png and /dev/null differ diff --git a/2021-02-20/2021-02-20-20_38_37_code.png b/2021-02-20/2021-02-20-20_38_37_code.png deleted file mode 100644 index 3f8ccac..0000000 Binary files a/2021-02-20/2021-02-20-20_38_37_code.png and /dev/null differ diff --git a/2021-02-20/2021-02-20-20_39_18_code.png b/2021-02-20/2021-02-20-20_39_18_code.png deleted file mode 100644 index 2edc4c8..0000000 Binary files a/2021-02-20/2021-02-20-20_39_18_code.png and /dev/null differ diff --git a/2021-02-20/2021-02-20-20_40_49_code.png b/2021-02-20/2021-02-20-20_40_49_code.png deleted file mode 100644 index b41197a..0000000 Binary files a/2021-02-20/2021-02-20-20_40_49_code.png and /dev/null differ diff --git a/2021-02-20/2021-02-20-20_40_58_code.png b/2021-02-20/2021-02-20-20_40_58_code.png deleted file mode 100644 index 225086d..0000000 Binary files a/2021-02-20/2021-02-20-20_40_58_code.png and /dev/null differ diff --git a/code-record/.gitkeep b/code-record/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e3a0a08 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,57 @@ +absl-py==0.11.0 +astor==0.8.1 +autopep8==1.5.4 +beautifulsoup4==4.5.3 +bs4==0.0.1 +cached-property==1.5.2 +certifi==2020.12.5 +chardet==3.0.4 +cycler==0.10.0 +fake-useragent==0.1.11 +gast==0.4.0 +google-pasta==0.2.0 +grpcio==1.34.0 +h5py==3.1.0 +idna==2.6 +importlib-metadata==3.3.0 +joblib==1.0.0 +Keras==2.2.4 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.2 +kiwisolver==1.3.1 +lxml==4.6.2 +Markdown==3.3.3 +matplotlib==3.3.3 +ntplib==0.3.4 +numpy==1.19.5 +opencv-python==4.5.1.48 +pandas==1.1.5 +Pillow==8.1.0 +protobuf==3.14.0 +pycodestyle==2.6.0 +PyMySQL==1.0.2 +pyparsing==2.4.7 +pysnowflake==0.1.3 +pytesseract==0.3.7 +python-dateutil==2.8.1 +pytz==2020.5 +PyYAML==5.3.1 +requests==2.18.4 +scikit-learn==0.24.0 +scipy==1.6.0 +selenium==3.11.0 +six==1.15.0 +sklearn==0.0 +tensorboard==1.14.0 +tensorflow==1.14.0 +tensorflow-estimator==1.14.0 +termcolor==1.1.0 +threadpoolctl==2.1.0 +toml==0.10.2 +tornado==6.1 +typing-extensions==3.7.4.3 +urllib3==1.22 +Werkzeug==1.0.1 +wrapcache==1.0.8 +wrapt==1.12.1 +zipp==3.4.0 diff --git a/src/acquire_fund_list.py b/src/acquire_fund_list.py index 94b92c7..867a51b 100644 --- a/src/acquire_fund_list.py +++ b/src/acquire_fund_list.py @@ -95,7 +95,7 @@ def get_fund_list(cookie_str=None): page_count = 25 page_num_total = math.ceil(int(chrome_driver.find_element_by_xpath( '/html/body/form/div[8]/div/div[4]/div[3]/div[2]/span').text) / page_count) - # 爬取共306页 + result_dir = './output/' output_head = '代码' + ',' + '晨星专属号' + ',' + '名称' + ',' + \ '类型' + ',' + '三年评级' + ',' + '五年评级' + ',' + '今年回报率' + '\n' @@ -162,14 +162,13 @@ def get_fund_list(cookie_str=None): rate_of_return.append(return_value) print('数据准备完毕') - fund_df = pd.DataFrame({'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat, + fund_df = pd.DataFrame({'id': id_list, 'fund_code': code_list, 'morning_star_code': morning_star_code_list, 'fund_name': name_list, 'fund_cat': fund_cat, 'fund_rating_3': fund_rating_3, 'fund_rating_5': fund_rating_5, 'rate_of_return': rate_of_return}) sql_insert = "replace into fund_morning_star(`id`, `fund_code`,`morning_star_code`, `fund_name`, `fund_cat`, `fund_rating_3`, `fund_rating_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s, %s, %s)" + # print('fund_df', fund_df) fund_list = fund_df.values.tolist() # cursor.executemany(sql_insert, fund_list) # connect.commit() - # sql_insert = "insert into fund_morning_star(`fund_code`, `fund_name`, `fund_cat`, `fund_rate_3`, `fund_rate_5`, `rate_of_return`) values(%s, %s, %s, %s, %s, %s)" - # ALTER TABLE fund_morning_star MODIFY COLUMN update_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP print('fund_list', fund_list) with open(result_dir + 'fund_morning_star.csv', 'a') as csv_file: for fund_item in fund_list: diff --git a/src/assets/star/tmp.gif b/src/assets/star/tmp.gif index d9969ad..cefcb51 100644 Binary files a/src/assets/star/tmp.gif and b/src/assets/star/tmp.gif differ diff --git a/src/utils.py b/src/utils.py index 55fb63f..b7484b2 100644 --- a/src/utils.py +++ b/src/utils.py @@ -41,7 +41,7 @@ def identify_verification_code(chrome_driver, id="checkcodeImg"): directory_time = time.strftime("%Y-%m-%d", time.localtime(time.time())) # 获取到当前文件的目录,并检查是否有 directory_time 文件夹,如果不存在则自动新建 directory_time 文件 try: - file_Path = os.getcwd() + '/' + directory_time + '/' + file_Path = os.getcwd() + '/code-record/' + directory_time + '/' if not os.path.exists(file_Path): os.makedirs(file_Path) print("目录新建成功:%s" % file_Path) @@ -52,7 +52,7 @@ def identify_verification_code(chrome_driver, id="checkcodeImg"): try: from selenium.webdriver.common.by import By ele = chrome_driver.find_element(By.ID, id) - code_path = './' + directory_time + '/' + picture_time + '_code.png' + code_path = './code-record/' + directory_time + '/' + picture_time + '_code.png' url = ele.screenshot(code_path) if url: print("%s :截图成功!!!" % url) @@ -99,7 +99,7 @@ def login_site(chrome_driver, site_url): password = chrome_driver.find_element_by_id('pwdValue') check_code = chrome_driver.find_element_by_id('txtCheckCode') username.send_keys('18219112108@163.com') - password.send_keys('w780880') + password.send_keys('xxxx') count = 1 flag = True while count < 10 and flag: