datasets
pydataset
from pydataset import data
data()
mtcars = data('mtcars')
print(mtcars.head())
vega_datasets
import ssl
import urllib.request
# SSL 인증서 검증 비활성화
ssl._create_default_https_context = ssl._create_unverified_context
import vega_datasets as ds
ds.data('7zip')
for i in ds.data.list_datasets():
try:
df = ds.data(i)
print(f"{i} : {df.shape}")
print(df.head())
except:
print(i)
pass
['7zip',
'airports',
'annual-precip',
'anscombe',
'barley',
'birdstrikes',
'budget',
'budgets',
'burtin',
'cars',
'climate',
'co2-concentration',
'countries',
'crimea',
'disasters',
'driving',
'earthquakes',
'ffox',
'flare',
'flare-dependencies',
'flights-10k',
'flights-200k',
'flights-20k',
'flights-2k',
'flights-3m',
'flights-5k',
'flights-airport',
'gapminder',
'gapminder-health-income',
'gimp',
'github',
'graticule',
'income',
'iowa-electricity',
'iris',
'jobs',
'la-riots',
'londonBoroughs',
'londonCentroids',
'londonTubeLines',
'lookup_groups',
'lookup_people',
'miserables',
'monarchs',
'movies',
'normal-2d',
'obesity',
'ohlc',
'points',
'population',
'population_engineers_hurricanes',
'seattle-temps',
'seattle-weather',
'sf-temps',
'sp500',
'stocks',
'udistrict',
'unemployment',
'unemployment-across-industries',
'uniform-2d',
'us-10m',
'us-employment',
'us-state-capitals',
'volcano',
'weather',
'weball26',
'wheat',
'windvectors',
'world-110m',
'zipcodes']import vega_datasets as ds
ds.data.cars()
ds.data('iris')
#---------------------------------------------------------------------------
ds.data('7zip')
# error [SSL: CERTIFICATE_VERIFY_FAILED]
# certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)>
# 인증서 설치
import certifi
import ssl
import urllib.request
# Python에 설치된 CA(인증 기관)의 정보를 찾기
print(certifi.where())
# ❯ python -m certifi
# /Users/onesixx/Library/r-miniconda/envs/py11/lib/python3.11/site-packages/certifi/cacert.pem
cafile = certifi.where()
ssl_context = ssl.create_default_context(cafile=cafile)
# export SSL_CERT_FILE=$(python -m certifi)
# export REQUESTS_CA_BUNDLE=$(python -m certifi)
# urllib의 urlopen에 SSL 컨텍스트를 사용하도록 설정
url = ds.data('7zip') # '7zip' 데이터셋의 URL을 얻습니다.
request = urllib.request.urlopen(url, context=ssl_context) # SSL 컨텍스트를 사용하여 요청 실행
# 요청 결과를 처리합니다. (예: 데이터를 읽고 파싱)
# 이 부분은 실제 데이터셋에 따라 다를 수 있으므로 적절히 조정해야 합니다.
data = request.read()
print(data) # 또는 데이터를 파싱하여 사용seaborn
import seaborn as sns
titanic = sns.load_dataset('titanic')
sns.get_dataset_names()
['anagrams',
'anscombe',
'attention',
'brain_networks',
'car_crashes',
'diamonds',
'dots',
'dowjones',
'exercise',
'flights',
'fmri',
'geyser',
'glue',
'healthexp',
'iris',
'mpg',
'penguins',
'planets',
'seaice',
'taxis',
'tips',
'titanic']
for i in sns.get_dataset_names():
print(i)
df = sns.load_dataset(i)
print(df.head())
예
import seaborn as sns
import pandas as pd
# Load dataset
df = sns.load_dataset("dowjones")
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])
# Convert 'Date' column to string format with desired date format
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
plotly.express
import plotly.express as px iris = px.data.iris()
from palmerpenguins import load_penguins penguins = load_penguins()
# pip install scikit-learn
import pandas as pd
### 1. scikit-learn datasets
import sklearn.datasets as sk_datasets
[i for i in dir(sk_datasets) if 'load' in i]
iris = sk_datasets.load_iris()[0] # Access the first element of the tuple
print(iris.DESCR)
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['Species'] = iris.target
### 2. statsmodels datasets
import statsmodels.api as sm
[i for i in dir(sm.datasets) if not i.startswith('_')]
df = sm.datasets.get_rdataset('iris').data
### 3. vega_datasets
import vega_datasets as vega
dir(vega.data)
df = vega.data.iris()
# from tabulate import tabulate
# print(tabulate(df.sample(10), headers='keys'))