- import numpy as np
- import pandas as pd
- # 1)爬取網頁
- import requests
- from bs4 import BeautifulSoup
-
- pd.set_option('display.max_rows', None) # old version
- # pd.set_option('display.max_columns', None)
- #pd.set_option('max_rows',500)
- pd.options.display.max_rows=3000
- pd.options.display.max_columns=40
-
- r =requests.get('https://isin.twse.com.tw/isin/C_public.jsp?strMode=2', timeout = 300)
- df = pd.read_html(r.text ,encoding='big5',header=0)[0]
- # df = pd.read_html("https://isin.twse.com.tw/isin/C_public.jsp?strMode=2",encoding='big5',header=0)[0] #僅show288列
- df =df.iloc[1:] #刪除第一列 股票...
- df = df[df['備註'].isnull()] #備註空白列保留
- #df = df[df['備註'].notna()] #保留備註非空白
- df2=pd.DataFrame((x.split(' ') for x in df['有價證券代號及名稱']),index=df.index,columns=['有價證券代號','名稱'])
- df=pd.merge(df,df2,right_index=True, left_index=True)
- df.to_excel (r'D:\data\Test3.xlsx', index = False, header=True)
- #print(df)
複製代碼
|