Data :: wine

Published by onesixx on

wine Quality

http://archive.ics.uci.edu/ml/datasets/Wine+Quality  data set from the UC Irvine Machine Learning Repo.

It gives us a nice mix of classification and regression problems to test on.

Two datasets are included, related to red and white vinho verde wine samples, from the north of Portugal. The goal is to model wine quality based on physicochemical tests (see [Cortez et al., 2009], [Web Link]).

 

Input variables (based on physicochemical tests): 
1 – fixed acidity 
2 – volatile acidity 
3 – citric acid 
4 – residual sugar 
5 – chlorides 
6 – free sulfur dioxide 
7 – total sulfur dioxide 
8 – density 
9 – pH 
10 – sulphates 
11 – alcohol 
Output variable (based on sensory data): 
12 – quality (score between 0 and 10)

PATH_ADATA <- "/Users/onesixx/Dropbox/Rhome/aData/" 
url_wineQuality_red   <- 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
url_wineQuality_white <- 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
wineQuality_red   <- read.table(url_wineQuality_red,   header=T, sep=';', stringsAsFactors=F)
wineQuality_white <- read.table(url_wineQuality_white, header=T, sep=';', stringsAsFactors=F)

wineQ_red   <- data.table(wineQuality_red,   color="red") 
wineQ_white <- data.table(wineQuality_white, color="white") 
wineQ <- rbind(wineQ_red, wineQ_white)

rm(url_wineQuality_red, url_wineQuality_white, wineQuality_red, wineQuality_white, wineQ_red, wineQ_white)

wineQ$is_red       <- as.factor(ifelse(wineQ$color=='red', 1, 0))
wineQ$high_quality <- as.factor(ifelse(wineQ$quality > 6, 1, 0))
wineQ$quality      <- as.factor(wineQ$quality)
saveRDS(wineQ, str_c(PATH_ADATA, "wineQ.rds"))
wineQ <- readRDS(str_c(PATH_ADATA, "wineQ.rds"))

 

### Data Original
PATH_ADATA <- "/Users/onesixx/Dropbox/Rhome/aData/"
wineQ <- readRDS(str_c(PATH_ADATA, "wineQ.rds"))
cols <- c('is_red', 'fixed.acidity', 'density', 'pH', 'alcohol')
dd <- wineQ[ , cols, with=F]

 

wine 

 

#* Data Loading ----
wineUrl <- 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
wine <- read.table(wineUrl, header=FALSE, sep=',', stringsAsFactors=FALSE,
                   col.names=c('Cultivar', 'Alcohol', 'Malic.acid', 'Ash', 'Alcalinity.of.ash', 'Magnesium',
                               'Total.phenols', 'Flavanoids', 'Nonflavanoid.phenols', 'Proanthocyanin',
                               'Color.intensity', 'Hue', 'OD280.OD315.of.diluted.wines', 'Proline'))

 

wine  price

 

rawDataUrl <- "http://pub.data.gov.bc.ca/datasets/176284/BC_Liquor_Store_Product_Price_List.csv"
bcl <- read.csv(rawDataUrl, stringsAsFactors = FALSE)
products <- c("BEER", "REFRESHMENT BEVERAGE", "SPIRITS", "WINE")
 
bcl <- dplyr::filter(bcl, PRODUCT_CLASS_NAME %in% products) %>%
          dplyr::select(PRODUCT_CLASS_NAME, 
                        PRODUCT_MINOR_CLASS_NAME, 
                        PRODUCT_LONG_NAME,
                        PRODUCT_COUNTRY_ORIGIN_NAME, 
                        PRODUCT_ALCOHOL_PERCENT, 
                        CURRENT_DISPLAY_PRICE,
                        SWEETNESS_CODE) %>%
              rename(Type           = PRODUCT_CLASS_NAME,
                     Subtype        = PRODUCT_MINOR_CLASS_NAME,
                     Name           = PRODUCT_LONG_NAME,
                     Country        = PRODUCT_COUNTRY_ORIGIN_NAME,
                     Alcohol_Content= PRODUCT_ALCOHOL_PERCENT,
                     Price          = CURRENT_DISPLAY_PRICE,
                     Sweetness      = SWEETNESS_CODE)
bcl$Type <- sub("^REFRESHMENT BEVERAGE$", "REFRESHMENT", bcl$Type)
bcl %>% head

 

 

 

Categories: Reshaping

onesixx

Blog Owner

Subscribe
Notify of
guest

0 Comments
Inline Feedbacks
View all comments
0
Would love your thoughts, please comment.x
()
x