DuckDB

Published by onesixx on

CRUD

# import duckdb
# duckdb.sql("SELECT 42").show()

# r1 = duckdb.sql("SELECT 42 AS i")
# duckdb.sql("SELECT i*2 AS k FROM r1").show()

import duckdb
import pandas as pd
import pydataset as ds
# ["coal", "nuclear", "Journals", "cabbages", "AustralianElectionPolling", "Bundesliga"]

# Create a connection to DuckDB
conn = duckdb.connect('a_database.db')  # in-memory database
conn.execute("SHOW TABLES").fetchdf()
# conn.colse()
# C 
# 테이블 생성
conn.execute('''
CREATE TABLE daily(
    date DATE,
    name VARCHAR,
    saved BOOLEAN,
    UNIQUE(date, name)
);
''')

# 데이터 삽입
conn.execute('''
INSERT INTO daily (date, name, saved) VALUES
('2024-05-04', 'Daily', True),
('2024-05-05', 'Daily', False);
''')
###### R
# 데이터 조회
result0= conn.query('SELECT * FROM daily').to_df()       # prepare query & excute DataFrame
result1= conn.execute('SELECT * FROM daily').fetchdf()   # excutes & fetch DataFrame
result2= conn.execute('SELECT * FROM daily').fetchall()  # list of tuples

###### U 
# 데이터 갱신
conn.execute('''
UPDATE daily
SET saved = False
WHERE date = '2024-05-04';
''')

conn.execute('''
INSERT INTO daily (date, name, saved)
VALUES ('2024-05-04', 'Daily', TRUE)
ON CONFLICT (date, name)
DO UPDATE SET saved = excluded.saved;
''')

# insert Dataframe
data = {
    'date': ['2024-05-06', '2024-05-07'],
    'name': ['Daily', 'Daily'],
    'saved': [True, False]
}
df = pd.DataFrame(data) 
for index, row in df.iterrows():
    query = f'''
    INSERT INTO daily (date, name, saved)
    VALUES ('{row['date']}', '{row['name']}', {row['saved']})
    ON CONFLICT (date, name)
    DO UPDATE SET saved = excluded.saved;
    '''
    conn.execute(query)

for index, row in df.iterrows():
    conn.execute('''
    INSERT INTO daily (date, name, saved)
    VALUES (?, ?, ?)
    ON CONFLICT (date, name)
    DO UPDATE SET saved = excluded.saved;
    ''', (row['date'], row['name'], row['saved']))

###### D
conn.execute('''
DELETE FROM daily
WHERE name = 'Jane Doe';
''')

conn.execute('''
Drop TABLE daily;
''')


#--------------------------------
conn.execute("""
SELECT table_name 
FROM information_schema.tables 
WHERE table_type = 'VIEW'
""").fetchdf()


# conn = duckdb.connect(':memory:')

# conn.execute("CREATE SEQUENCE seq_personid START 1;")
# conn.execute('''
#     create table customer(
#         id integer primary key default nextval('seq_personid'),
#         name varchar,
#         email varchar,
#         age int
#     )   
# ''')

sample_dataframe = ds.data('AustralianElectionPolling')
conn.execute("select * from sample_dataframe").fetchdf()
conn.register('sample_dataframe_view', sample_dataframe)

conn.execute("CREATE TABLE my_table AS SELECT * FROM sample_dataframe_view")
conn.execute(f"select * from my_table").fetchdf()

conn.execute(f"SELECT * FROM my_table").fetchall()

def run_sql(sql):
    result = conn.execute(sql)
    return result.fetchall()
column_metadata = run_sql(f'PRAGMA table_info (my_table);')
columnNm = [col[1] for col in column_metadata]

result = conn.execute(f"SELECT * FROM my_table")
df = pd.DataFrame(result.fetchall(), columns=columnNm)

df.equals(sample_dataframe) # False
# df None , sample_dataframe NaN


df_sorted               = df.sort_values(by=df.columns.tolist()).reset_index(drop=True)
sample_dataframe_sorted = sample_dataframe.sort_values(by=sample_dataframe.columns.tolist()).reset_index(drop=True)
are_data_same = df_sorted.equals(sample_dataframe_sorted)

query. excute

conn.query(qry).to_df()
conn.execute(qry).fetchdf()

update

import duckdb

try:
    # Assuming 'a_database.duckdb' is your database file.
    # Ensure the database is not opened in read-only mode.
    conn = duckdb.connect(database='a_database.duckdb', read_only=False)
    
    # Example query - adjust as needed
    qry = "YOUR QUERY HERE"
    
    # Execute a query to test - replace with your actual query
    result = conn.execute(qry)
    
    # If the query is a SELECT, fetch results
    if qry.strip().upper().startswith("SELECT"):
        df = result.fetchdf()
        print(df)
    # Ensure to commit if the query modifies the database
    else:
        conn.commit()
    
except duckdb.InvalidInputException as e:
    print(f"Error executing query: {e}")
finally:
    conn.close()
Categories: shiny

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Oldest
Newest Most Voted
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x