datasets
pydataset
from pydataset import data data() mtcars = data('mtcars') print(mtcars.head())
vega_datasets
import ssl
import urllib.request
# SSL 인증서 검증 비활성화
ssl._create_default_https_context = ssl._create_unverified_context
import vega_datasets as ds
ds.data('7zip')
for i in ds.data.list_datasets():
try:
df = ds.data(i)
print(f"{i} : {df.shape}")
print(df.head())
except:
print(i)
pass
['7zip',
'airports',
'annual-precip',
'anscombe',
'barley',
'birdstrikes',
'budget',
'budgets',
'burtin',
'cars',
'climate',
'co2-concentration',
'countries',
'crimea',
'disasters',
'driving',
'earthquakes',
'ffox',
'flare',
'flare-dependencies',
'flights-10k',
'flights-200k',
'flights-20k',
'flights-2k',
'flights-3m',
'flights-5k',
'flights-airport',
'gapminder',
'gapminder-health-income',
'gimp',
'github',
'graticule',
'income',
'iowa-electricity',
'iris',
'jobs',
'la-riots',
'londonBoroughs',
'londonCentroids',
'londonTubeLines',
'lookup_groups',
'lookup_people',
'miserables',
'monarchs',
'movies',
'normal-2d',
'obesity',
'ohlc',
'points',
'population',
'population_engineers_hurricanes',
'seattle-temps',
'seattle-weather',
'sf-temps',
'sp500',
'stocks',
'udistrict',
'unemployment',
'unemployment-across-industries',
'uniform-2d',
'us-10m',
'us-employment',
'us-state-capitals',
'volcano',
'weather',
'weball26',
'wheat',
'windvectors',
'world-110m',
'zipcodes']
import vega_datasets as ds
ds.data.cars()
ds.data('iris')
#---------------------------------------------------------------------------
ds.data('7zip')
# error [SSL: CERTIFICATE_VERIFY_FAILED]
# certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)>
# 인증서 설치
import certifi
import ssl
import urllib.request
# Python에 설치된 CA(인증 기관)의 정보를 찾기
print(certifi.where())
# ❯ python -m certifi
# /Users/onesixx/Library/r-miniconda/envs/py11/lib/python3.11/site-packages/certifi/cacert.pem
cafile = certifi.where()
ssl_context = ssl.create_default_context(cafile=cafile)
# export SSL_CERT_FILE=$(python -m certifi)
# export REQUESTS_CA_BUNDLE=$(python -m certifi)
# urllib의 urlopen에 SSL 컨텍스트를 사용하도록 설정
url = ds.data('7zip') # '7zip' 데이터셋의 URL을 얻습니다.
request = urllib.request.urlopen(url, context=ssl_context) # SSL 컨텍스트를 사용하여 요청 실행
# 요청 결과를 처리합니다. (예: 데이터를 읽고 파싱)
# 이 부분은 실제 데이터셋에 따라 다를 수 있으므로 적절히 조정해야 합니다.
data = request.read()
print(data) # 또는 데이터를 파싱하여 사용
seaborn
import seaborn as sns titanic = sns.load_dataset('titanic') sns.get_dataset_names() ['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'dowjones', 'exercise', 'flights', 'fmri', 'geyser', 'glue', 'healthexp', 'iris', 'mpg', 'penguins', 'planets', 'seaice', 'taxis', 'tips', 'titanic'] for i in sns.get_dataset_names(): print(i) df = sns.load_dataset(i) print(df.head())
예
import seaborn as sns import pandas as pd # Load dataset df = sns.load_dataset("dowjones") # Convert 'Date' column to datetime format df['Date'] = pd.to_datetime(df['Date']) # Convert 'Date' column to string format with desired date format df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
plotly.express
import plotly.express as px iris = px.data.iris()
from palmerpenguins import load_penguins penguins = load_penguins()
# pip install scikit-learn import pandas as pd ### 1. scikit-learn datasets import sklearn.datasets as sk_datasets [i for i in dir(sk_datasets) if 'load' in i] iris = sk_datasets.load_iris()[0] # Access the first element of the tuple print(iris.DESCR) df = pd.DataFrame(iris.data, columns=iris.feature_names) df['Species'] = iris.target ### 2. statsmodels datasets import statsmodels.api as sm [i for i in dir(sm.datasets) if not i.startswith('_')] df = sm.datasets.get_rdataset('iris').data ### 3. vega_datasets import vega_datasets as vega dir(vega.data) df = vega.data.iris() # from tabulate import tabulate # print(tabulate(df.sample(10), headers='keys'))