Data Prep

## Markdown Setup

#Time it
start.time <- Sys.time()

#Markdown Options
knitr::opts_chunk$set(echo = TRUE,
                      warning=FALSE,
                      message=FALSE,
                      widgetframe_self_contained = FALSE,
                      widgetframe_isolate_widgets = TRUE)

#Read in custom functions
source("D:/data_projects/functions/eda_libraries.R")
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:e1071':
## 
##     impute
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
source("D:/data_projects/functions/audit_numeric_summary.R")
source("D:/data_projects/functions/audit_numeric_viz.R")
source("D:/data_projects/functions/audit_factor_summary.R")

#Set seed for reproducibility
set.seed(218)
#Read in Data and Clean Column Names with the Janitor library
mb_data <- read.csv("D:/data_projects/market_basket_analysis/Market_Basket_Optimisation.csv", header=FALSE)

#Inspect Raw Data
datatable(head(mb_data, 5),
          rownames=FALSE,
          class = 'cell-border stripe compact hover',
          caption = "Inspect Raw Data: First 5 Rows")
#Remove white spaces, consistency of leading, trailing and spaces between bigrams tough to discern.
mb_data <- as.data.frame(apply(mb_data,2,function(x)gsub('\\s+', '',x)))

#Add ID column
mb_data <- tibble::rowid_to_column(mb_data, "transaction_id")

#Rename columns
colnames(mb_data) <- c("transaction_id",
                       "item_1",
                       "item_2",
                       "item_3",
                       "item_4",
                       "item_5",
                       "item_6",
                       "item_7",
                       "item_8",
                       "item_9",
                       "item_10",
                       "item_11",
                       "item_12",
                       "item_13",
                       "item_14",
                       "item_15",
                       "item_16",
                       "item_17",
                       "item_18",
                       "item_19",
                       "item_20"
                       )

#Inspect Clean Data
datatable(head(mb_data, 5),
          rownames=FALSE,
          class = 'cell-border stripe compact hover',
          caption = "Inspect Clean Data: First 5 Rows")
### One Column Dataframe containing all items purchased at this store.
#subset to only columns with items
mb_data_no_id <- subset(mb_data, select=-c(transaction_id))

#Put all items into one column
mb_data_items <- data.frame(item=unlist(mb_data_no_id, use.names = FALSE))

#Set and remove NA values
mb_data_items[mb_data_items==""]<-NA
mb_data_items <- data.frame(na.omit(mb_data_items))

Data Summary

Items Sold Summary

audit_factor_summary(mb_data_items, presentation = TRUE)
## [1] "item"

## 

item

count

pct

mineralwater

1788

0.0609

eggs

1348

0.0459

spaghetti

1306

0.0445

frenchfries

1282

0.0437

chocolate

1230

0.0419

greentea

991

0.0337

milk

972

0.0331

groundbeef

737

0.0251

frozenvegetables

715

0.0244

pancakes

713

0.0243

burgers

654

0.0223

cake

608

0.0207

cookies

603

0.0205

escalope

595

0.0203

lowfatyogurt

574

0.0195

shrimp

536

0.0183

tomatoes

513

0.0175

oliveoil

494

0.0168

frozensmoothie

475

0.0162

turkey

469

0.0160

chicken

450

0.0153

wholewheatrice

439

0.0150

gratedcheese

393

0.0134

cookingoil

383

0.0130

soup

379

0.0129

herb&pepper

371

0.0126

honey

356

0.0121

champagne

351

0.0120

freshbread

323

0.0110

salmon

319

0.0109

brownies

253

0.0086

avocado

250

0.0085

hotdogs

243

0.0083

cottagecheese

239

0.0081

tomatojuice

228

0.0078

butter

226

0.0077

wholewheatpasta

221

0.0075

redwine

211

0.0072

yogurtcake

205

0.0070

lightmayo

204

0.0069

energybar

203

0.0069

ham

203

0.0069

energydrink

200

0.0068

pepper

199

0.0068

cereals

193

0.0066

vegetablesmix

193

0.0066

muffins

181

0.0062

oil

173

0.0059

frenchwine

169

0.0058

freshtuna

167

0.0057

strawberries

160

0.0054

meatballs

157

0.0053

almonds

153

0.0052

parmesancheese

149

0.0051

mushroomcreamsauce

143

0.0049

rice

141

0.0048

proteinbar

139

0.0047

mint

131

0.0045

whitewine

124

0.0042

pasta

118

0.0040

lightcream

117

0.0040

carrots

115

0.0039

blacktea

107

0.0036

tomatosauce

106

0.0036

fromageblanc

102

0.0035

gums

101

0.0034

eggplant

99

0.0034

extradarkchocolate

90

0.0031

melons

90

0.0031

bodyspray

86

0.0029

yams

86

0.0029

magazines

82

0.0028

barbecuesauce

81

0.0028

cider

79

0.0027

nonfatmilk

78

0.0027

candybars

73

0.0025

zucchini

71

0.0024

wholeweatflour

70

0.0024

blueberries

69

0.0023

salt

69

0.0023

flaxseed

68

0.0023

greengrapes

68

0.0023

antioxydantjuice

67

0.0023

bacon

65

0.0022

bugspray

65

0.0022

greenbeans

65

0.0022

clothesaccessories

63

0.0021

toothpaste

61

0.0021

shallot

58

0.0020

strongcheese

58

0.0020

spinach

53

0.0018

glutenfreebar

52

0.0018

petfood

49

0.0017

soda

47

0.0016

sparklingwater

47

0.0016

chili

46

0.0016

mayonnaise

46

0.0016

pickles

45

0.0015

burgersauce

44

0.0015

mintgreentea

42

0.0014

handproteinbar

39

0.0013

salad

37

0.0013

shampoo

37

0.0013

asparagus

36

0.0012

cauliflower

36

0.0012

corn

36

0.0012

babiesfood

34

0.0012

sandwich

34

0.0012

dessertwine

33

0.0011

ketchup

33

0.0011

oatmeal

33

0.0011

chocolatebread

32

0.0011

chutney

31

0.0011

mashedpotato

31

0.0011

tea

29

0.0010

bramble

14

0.0005

cream

7

0.0002

napkins

5

0.0002

waterspray

3

0.0001

## [1] "Done Processing" ## [1] "1 variables processed" ## [1] "Function Efficiency Statistics"
##    user  system elapsed 
##    1.00    0.08    1.09

NA Summary

#Define NAs in the dataset
mb_data[mb_data==""]<-NA

#non-NAs by item number sold
df_1 <- mb_data %>%
   summarise_all(funs(sum(!is.na(.))))
df_1 <- data.frame(non_na_values = t(df_1))

#NAs
df_2 <- mb_data %>%
   summarise_all(funs(sum(is.na(.))))
df_2 <- data.frame(na_values = t(df_2))

#Merge Frames
df_3 <- data.frame(merge(df_1, df_2, by = 0, all=TRUE))
df_3 <- data.frame(names = row.names(df_3), df_3)
df_3 <- df_3[order(-df_3$non_na_values),]

datatable(df_3,
          rownames=FALSE,
          class = 'cell-border stripe compact hover',
          caption = "NA Summary")

Sales by item number sold

audit_factor_summary(mb_data, presentation=TRUE)
## [1] "item_1"

## 

item_1

count

pct

mineralwater

577

0.0769

burgers

576

0.0768

turkey

458

0.0611

chocolate

391

0.0521

frozenvegetables

373

0.0497

spaghetti

354

0.0472

shrimp

325

0.0433

gratedcheese

293

0.0391

eggs

279

0.0372

cookies

270

0.0360

frenchfries

244

0.0325

herb&pepper

232

0.0309

groundbeef

218

0.0291

tomatoes

212

0.0283

milk

181

0.0241

escalope

143

0.0191

freshtuna

129

0.0172

redwine

123

0.0164

ham

120

0.0160

cake

98

0.0131

greentea

97

0.0129

wholewheatpasta

95

0.0127

pancakes

80

0.0107

soup

78

0.0104

muffins

69

0.0092

energybar

67

0.0089

oliveoil

67

0.0089

champagne

64

0.0085

pepper

61

0.0081

avocado

57

0.0076

butter

52

0.0069

parmesancheese

51

0.0068

wholewheatrice

47

0.0063

lowfatyogurt

46

0.0061

chicken

44

0.0059

vegetablesmix

39

0.0052

pickles

38

0.0051

meatballs

33

0.0044

frozensmoothie

32

0.0043

yogurtcake

31

0.0041

salmon

29

0.0039

dessertwine

28

0.0037

hotdogs

28

0.0037

honey

27

0.0036

candybars

25

0.0033

cereals

25

0.0033

oil

24

0.0032

strawberries

24

0.0032

tomatosauce

24

0.0032

yams

24

0.0032

brownies

23

0.0031

cookingoil

21

0.0028

antioxydantjuice

18

0.0024

energydrink

18

0.0024

flaxseed

18

0.0024

frenchwine

18

0.0024

cottagecheese

17

0.0023

freshbread

14

0.0019

fromageblanc

14

0.0019

nonfatmilk

14

0.0019

proteinbar

14

0.0019

gums

12

0.0016

pasta

12

0.0016

tomatojuice

12

0.0016

almonds

11

0.0015

cider

11

0.0015

lightmayo

11

0.0015

soda

11

0.0015

mint

10

0.0013

rice

10

0.0013

blacktea

9

0.0012

clothesaccessories

9

0.0012

extradarkchocolate

8

0.0011

greenbeans

8

0.0011

melons

8

0.0011

sandwich

8

0.0011

spinach

8

0.0011

wholeweatflour

8

0.0011

chutney

7

0.0009

greengrapes

7

0.0009

mushroomcreamsauce

7

0.0009

salt

7

0.0009

strongcheese

7

0.0009

whitewine

7

0.0009

bacon

6

0.0008

bugspray

6

0.0008

mintgreentea

6

0.0008

petfood

6

0.0008

babiesfood

5

0.0007

chili

5

0.0007

glutenfreebar

5

0.0007

lightcream

5

0.0007

magazines

5

0.0007

blueberries

4

0.0005

mayonnaise

4

0.0005

shallot

4

0.0005

sparklingwater

4

0.0005

toothpaste

4

0.0005

asparagus

3

0.0004

barbecuesauce

3

0.0004

bramble

3

0.0004

carrots

3

0.0004

chocolatebread

3

0.0004

salad

3

0.0004

burgersauce

2

0.0003

eggplant

2

0.0003

handproteinbar

2

0.0003

mashedpotato

2

0.0003

bodyspray

1

0.0001

cauliflower

1

0.0001

corn

1

0.0001

cream

1

0.0001

ketchup

1

0.0001

oatmeal

1

0.0001

shampoo

1

0.0001

## [1] "item_2"

## 

item_2

count

pct

mineralwater

484

0.0842

spaghetti

411

0.0715

eggs

302

0.0525

groundbeef

291

0.0506

frenchfries

243

0.0423

frozenvegetables

234

0.0407

chocolate

223

0.0388

milk

209

0.0364

tomatoes

176

0.0306

shrimp

153

0.0266

herb&pepper

115

0.0200

cookies

101

0.0176

cake

99

0.0172

escalope

96

0.0167

wholewheatrice

92

0.0160

pancakes

91

0.0158

greentea

84

0.0146

oliveoil

82

0.0143

burgers

78

0.0136

champagne

78

0.0136

gratedcheese

78

0.0136

soup

76

0.0132

chicken

73

0.0127

redwine

72

0.0125

honey

69

0.0120

wholewheatpasta

68

0.0118

avocado

64

0.0111

frozensmoothie

63

0.0110

pepper

60

0.0104

lowfatyogurt

58

0.0101

ham

57

0.0099

salmon

54

0.0094

butter

53

0.0092

cookingoil

50

0.0087

energydrink

48

0.0084

vegetablesmix

45

0.0078

energybar

44

0.0077

parmesancheese

44

0.0077

pasta

40

0.0070

yogurtcake

38

0.0066

meatballs

37

0.0064

freshtuna

34

0.0059

cottagecheese

32

0.0056

muffins

32

0.0056

blacktea

31

0.0054

frenchwine

30

0.0052

lightmayo

30

0.0052

almonds

29

0.0050

tomatosauce

29

0.0050

hotdogs

28

0.0049

freshbread

25

0.0044

oil

25

0.0044

yams

25

0.0044

brownies

22

0.0038

tomatojuice

22

0.0038

mint

21

0.0037

mushroomcreamsauce

21

0.0037

strawberries

19

0.0033

cereals

18

0.0031

fromageblanc

18

0.0031

cider

17

0.0030

eggplant

17

0.0030

rice

17

0.0030

flaxseed

15

0.0026

melons

15

0.0026

bodyspray

13

0.0023

carrots

13

0.0023

greengrapes

13

0.0023

gums

13

0.0023

whitewine

13

0.0023

nonfatmilk

12

0.0021

candybars

11

0.0019

turkey

11

0.0019

antioxydantjuice

10

0.0017

proteinbar

10

0.0017

shallot

10

0.0017

spinach

10

0.0017

zucchini

10

0.0017

barbecuesauce

9

0.0016

bugspray

9

0.0016

soda

9

0.0016

bacon

8

0.0014

blueberries

8

0.0014

extradarkchocolate

8

0.0014

lightcream

8

0.0014

mintgreentea

8

0.0014

salad

8

0.0014

clothesaccessories

7

0.0012

glutenfreebar

6

0.0010

petfood

6

0.0010

pickles

6

0.0010

salt

6

0.0010

babiesfood

5

0.0009

burgersauce

5

0.0009

chutney

5

0.0009

corn

5

0.0009

greenbeans

5

0.0009

handproteinbar

5

0.0009

ketchup

5

0.0009

mayonnaise

5

0.0009

tea

5

0.0009

wholeweatflour

5

0.0009

oatmeal

4

0.0007

sparklingwater

4

0.0007

strongcheese

4

0.0007

chocolatebread

3

0.0005

dessertwine

3

0.0005

mashedpotato

3

0.0005

sandwich

3

0.0005

toothpaste

3

0.0005

asparagus

2

0.0003

magazines

2

0.0003

shampoo

2

0.0003

cauliflower

1

0.0002

chili

1

0.0002

cream

1

0.0002

waterspray

1

0.0002

## [1] "item_3"

## 

item_3

count

pct

mineralwater

375

0.0854

spaghetti

279

0.0636

eggs

225

0.0513

milk

213

0.0485

frenchfries

180

0.0410

chocolate

174

0.0396

groundbeef

141

0.0321

greentea

125

0.0285

cake

111

0.0253

pancakes

104

0.0237

oliveoil

88

0.0201

escalope

87

0.0198

frozenvegetables

87

0.0198

lowfatyogurt

87

0.0198

tomatoes

85

0.0194

soup

77

0.0175

chicken

74

0.0169

wholewheatrice

69

0.0157

honey

62

0.0141

salmon

60

0.0137

champagne

58

0.0132

frozensmoothie

58

0.0132

cookingoil

51

0.0116

pepper

51

0.0116

cookies

50

0.0114

shrimp

48

0.0109

avocado

46

0.0105

butter

45

0.0103

vegetablesmix

38

0.0087

pasta

37

0.0084

energybar

36

0.0082

almonds

35

0.0080

brownies

35

0.0080

cottagecheese

34

0.0077

energydrink

34

0.0077

parmesancheese

34

0.0077

freshbread

33

0.0075

wholewheatpasta

33

0.0075

frenchwine

32

0.0073

yogurtcake

32

0.0073

meatballs

31

0.0071

mushroomcreamsauce

31

0.0071

tomatosauce

31

0.0071

hotdogs

30

0.0068

gums

26

0.0059

lightmayo

25

0.0057

yams

24

0.0055

muffins

23

0.0052

oil

23

0.0052

gratedcheese

21

0.0048

strawberries

21

0.0048

tomatojuice

21

0.0048

cereals

20

0.0046

cider

19

0.0043

herb&pepper

19

0.0043

barbecuesauce

18

0.0041

eggplant

18

0.0041

whitewine

18

0.0041

lightcream

17

0.0039

rice

17

0.0039

blacktea

15

0.0034

fromageblanc

15

0.0034

mint

15

0.0034

shallot

15

0.0034

bodyspray

14

0.0032

carrots

14

0.0032

melons

14

0.0032

blueberries

13

0.0030

greengrapes

13

0.0030

antioxydantjuice

12

0.0027

bacon

12

0.0027

flaxseed

12

0.0027

strongcheese

12

0.0027

bugspray

11

0.0025

extradarkchocolate

11

0.0025

proteinbar

11

0.0025

redwine

11

0.0025

ham

10

0.0023

magazines

10

0.0023

spinach

10

0.0023

candybars

9

0.0021

clothesaccessories

9

0.0021

nonfatmilk

9

0.0021

toothpaste

9

0.0021

salt

8

0.0018

soda

8

0.0018

wholeweatflour

8

0.0018

mashedpotato

7

0.0016

chili

6

0.0014

chocolatebread

6

0.0014

greenbeans

6

0.0014

salad

6

0.0014

asparagus

5

0.0011

corn

5

0.0011

glutenfreebar

5

0.0011

oatmeal

5

0.0011

sparklingwater

5

0.0011

babiesfood

4

0.0009

burgersauce

4

0.0009

freshtuna

4

0.0009

mintgreentea

4

0.0009

petfood

4

0.0009

shampoo

4

0.0009

tea

4

0.0009

mayonnaise

3

0.0007

sandwich

3

0.0007

chutney

2

0.0005

dessertwine

2

0.0005

ketchup

2

0.0005

zucchini

2

0.0005

cauliflower

1

0.0002

cream

1

0.0002

handproteinbar

1

0.0002

pickles

1

0.0002

waterspray

1

0.0002

## [1] "item_4"

## 

item_4

count

pct

mineralwater

201

0.0601

eggs

181

0.0541

frenchfries

174

0.0520

spaghetti

167

0.0499

milk

149

0.0445

chocolate

143

0.0428

greentea

132

0.0395

pancakes

111

0.0332

cake

92

0.0275

oliveoil

84

0.0251

soup

72

0.0215

chicken

70

0.0209

lowfatyogurt

68

0.0203

wholewheatrice

68

0.0203

escalope

63

0.0188

frozensmoothie

62

0.0185

cookingoil

58

0.0173

groundbeef

57

0.0170

cookies

55

0.0164

honey

53

0.0158

salmon

47

0.0141

avocado

43

0.0129

champagne

41

0.0123

freshbread

41

0.0123

tomatoes

37

0.0111

butter

36

0.0108

strawberries

36

0.0108

lightmayo

30

0.0090

almonds

29

0.0087

cottagecheese

29

0.0087

frenchwine

28

0.0084

mushroomcreamsauce

27

0.0081

brownies

26

0.0078

cereals

26

0.0078

muffins

26

0.0078

yogurtcake

26

0.0078

hotdogs

25

0.0075

meatballs

24

0.0072

rice

24

0.0072

vegetablesmix

24

0.0072

energybar

23

0.0069

fromageblanc

21

0.0063

pepper

21

0.0063

proteinbar

21

0.0063

tomatojuice

20

0.0060

energydrink

18

0.0054

frozenvegetables

18

0.0054

lightcream

18

0.0054

oil

17

0.0051

carrots

15

0.0045

eggplant

15

0.0045

extradarkchocolate

14

0.0042

flaxseed

14

0.0042

bugspray

13

0.0039

cider

13

0.0039

greengrapes

13

0.0039

mint

13

0.0039

parmesancheese

13

0.0039

wholeweatflour

13

0.0039

wholewheatpasta

13

0.0039

strongcheese

12

0.0036

whitewine

12

0.0036

bacon

11

0.0033

clothesaccessories

11

0.0033

barbecuesauce

10

0.0030

glutenfreebar

10

0.0030

greenbeans

10

0.0030

magazines

10

0.0030

tomatosauce

10

0.0030

blacktea

9

0.0027

blueberries

9

0.0027

bodyspray

9

0.0027

gums

9

0.0027

nonfatmilk

9

0.0027

petfood

9

0.0027

shrimp

9

0.0027

zucchini

9

0.0027

burgersauce

8

0.0024

corn

8

0.0024

melons

8

0.0024

pasta

8

0.0024

shallot

8

0.0024

sparklingwater

8

0.0024

spinach

8

0.0024

chocolatebread

7

0.0021

yams

7

0.0021

antioxydantjuice

6

0.0018

ketchup

6

0.0018

oatmeal

6

0.0018

chili

5

0.0015

herb&pepper

5

0.0015

soda

5

0.0015

toothpaste

5

0.0015

asparagus

4

0.0012

candybars

4

0.0012

chutney

4

0.0012

handproteinbar

4

0.0012

mashedpotato

4

0.0012

mayonnaise

4

0.0012

mintgreentea

4

0.0012

salt

4

0.0012

sandwich

4

0.0012

shampoo

4

0.0012

babiesfood

3

0.0009

cauliflower

3

0.0009

redwine

3

0.0009

bramble

2

0.0006

cream

1

0.0003

gratedcheese

1

0.0003

ham

1

0.0003

napkins

1

0.0003

salad

1

0.0003

tea

1

0.0003

waterspray

1

0.0003

## [1] "item_5"

## 

item_5

count

pct

greentea

153

0.0605

eggs

134

0.0530

frenchfries

130

0.0514

chocolate

115

0.0455

milk

114

0.0451

pancakes

90

0.0356

mineralwater

84

0.0332

oliveoil

64

0.0253

cookingoil

63

0.0249

wholewheatrice

63

0.0249

lowfatyogurt

62

0.0245

spaghetti

62

0.0245

cake

60

0.0237

escalope

59

0.0233

chicken

53

0.0210

salmon

53

0.0210

frozensmoothie

49

0.0194

cookies

45

0.0178

soup

45

0.0178

honey

42

0.0166

hotdogs

35

0.0138

freshbread

34

0.0134

tomatojuice

32

0.0127

brownies

28

0.0111

cottagecheese

28

0.0111

cereals

26

0.0103

champagne

26

0.0103

frenchwine

24

0.0095

rice

24

0.0095

lightmayo

23

0.0091

oil

23

0.0091

butter

21

0.0083

carrots

21

0.0083

groundbeef

21

0.0083

lightcream

20

0.0079

energybar

19

0.0075

mushroomcreamsauce

19

0.0075

avocado

18

0.0071

fromageblanc

18

0.0071

meatballs

17

0.0067

vegetablesmix

17

0.0067

yogurtcake

17

0.0067

zucchini

17

0.0067

blacktea

16

0.0063

whitewine

16

0.0063

bodyspray

15

0.0059

muffins

15

0.0059

proteinbar

15

0.0059

strawberries

14

0.0055

magazines

13

0.0051

almonds

12

0.0047

energydrink

12

0.0047

gums

12

0.0047

mint

12

0.0047

nonfatmilk

12

0.0047

pasta

12

0.0047

wholewheatpasta

12

0.0047

barbecuesauce

11

0.0043

greengrapes

10

0.0040

tomatosauce

10

0.0040

blueberries

9

0.0036

eggplant

9

0.0036

extradarkchocolate

9

0.0036

bacon

8

0.0032

candybars

8

0.0032

clothesaccessories

8

0.0032

melons

8

0.0032

salt

8

0.0032

strongcheese

8

0.0032

cider

7

0.0028

greenbeans

7

0.0028

mintgreentea

7

0.0028

shallot

7

0.0028

spinach

7

0.0028

wholeweatflour

7

0.0028

chili

6

0.0024

parmesancheese

6

0.0024

petfood

6

0.0024

sparklingwater

6

0.0024

toothpaste

6

0.0024

bugspray

5

0.0020

flaxseed

5

0.0020

ham

5

0.0020

handproteinbar

5

0.0020

sandwich

5

0.0020

asparagus

4

0.0016

burgersauce

4

0.0016

cauliflower

4

0.0016

chocolatebread

4

0.0016

chutney

4

0.0016

corn

4

0.0016

glutenfreebar

4

0.0016

mayonnaise

4

0.0016

tea

4

0.0016

antioxydantjuice

3

0.0012

ketchup

3

0.0012

pepper

3

0.0012

yams

3

0.0012

babiesfood

2

0.0008

bramble

2

0.0008

frozenvegetables

2

0.0008

mashedpotato

2

0.0008

napkins

2

0.0008

oatmeal

2

0.0008

redwine

2

0.0008

salad

2

0.0008

shampoo

2

0.0008

soda

2

0.0008

cream

1

0.0004

tomatoes

1

0.0004

## [1] "item_6"

## 

item_6

count

pct

frenchfries

107

0.0574

eggs

102

0.0547

greentea

100

0.0536

chocolate

71

0.0381

pancakes

69

0.0370

oliveoil

63

0.0338

cake

60

0.0322

lowfatyogurt

59

0.0317

frozensmoothie

58

0.0311

chicken

51

0.0274

milk

51

0.0274

cookingoil

48

0.0258

mineralwater

44

0.0236

escalope

42

0.0225

freshbread

39

0.0209

salmon

36

0.0193

honey

35

0.0188

wholewheatrice

34

0.0182

brownies

30

0.0161

cottagecheese

30

0.0161

champagne

28

0.0150

cookies

28

0.0150

hotdogs

26

0.0139

cereals

23

0.0123

spaghetti

23

0.0123

oil

22

0.0118

yogurtcake

22

0.0118

lightmayo

20

0.0107

tomatojuice

19

0.0102

energydrink

18

0.0097

soup

18

0.0097

proteinbar

17

0.0091

whitewine

16

0.0086

extradarkchocolate

15

0.0080

strawberries

15

0.0080

blacktea

14

0.0075

carrots

14

0.0075

rice

14

0.0075

almonds

13

0.0070

eggplant

13

0.0070

frenchwine

13

0.0070

lightcream

13

0.0070

mint

13

0.0070

nonfatmilk

12

0.0064

salt

11

0.0059

barbecuesauce

10

0.0054

butter

10

0.0054

vegetablesmix

10

0.0054

avocado

9

0.0048

bodyspray

9

0.0048

cauliflower

9

0.0048

meatballs

9

0.0048

melons

9

0.0048

blueberries

8

0.0043

burgersauce

8

0.0043

fromageblanc

8

0.0043

magazines

8

0.0043

toothpaste

8

0.0043

bugspray

7

0.0038

groundbeef

7

0.0038

mushroomcreamsauce

7

0.0038

wholeweatflour

7

0.0038

antioxydantjuice

6

0.0032

babiesfood

6

0.0032

bacon

6

0.0032

energybar

6

0.0032

greenbeans

6

0.0032

mayonnaise

6

0.0032

salad

6

0.0032

soda

6

0.0032

strongcheese

6

0.0032

zucchini

6

0.0032

chili

5

0.0027

greengrapes

5

0.0027

handproteinbar

5

0.0027

ketchup

5

0.0027

oatmeal

5

0.0027

glutenfreebar

4

0.0021

mintgreentea

4

0.0021

muffins

4

0.0021

petfood

4

0.0021

shallot

4

0.0021

asparagus

3

0.0016

chocolatebread

3

0.0016

cider

3

0.0016

clothesaccessories

3

0.0016

corn

3

0.0016

ham

3

0.0016

pasta

3

0.0016

tea

3

0.0016

chutney

2

0.0011

cream

2

0.0011

gums

2

0.0011

pepper

2

0.0011

sparklingwater

2

0.0011

tomatoes

2

0.0011

tomatosauce

2

0.0011

bramble

1

0.0005

candybars

1

0.0005

flaxseed

1

0.0005

frozenvegetables

1

0.0005

mashedpotato

1

0.0005

sandwich

1

0.0005

shrimp

1

0.0005

spinach

1

0.0005

yams

1

0.0005

## [1] "item_7"

## 

item_7

count

pct

greentea

96

0.0701

frenchfries

81

0.0592

pancakes

69

0.0504

eggs

59

0.0431

lowfatyogurt

55

0.0402

frozensmoothie

41

0.0299

freshbread

38

0.0278

cookingoil

37

0.0270

chocolate

36

0.0263

chicken

35

0.0256

escalope

31

0.0226

cake

30

0.0219

brownies

28

0.0205

oliveoil

27

0.0197

wholewheatrice

27

0.0197

champagne

26

0.0190

milk

26

0.0190

lightmayo

24

0.0175

tomatojuice

24

0.0175

honey

23

0.0168

hotdogs

22

0.0161

cottagecheese

19

0.0139

salmon

19

0.0139

whitewine

19

0.0139

cookies

17

0.0124

lightcream

17

0.0124

cereals

16

0.0117

frenchwine

16

0.0117

mineralwater

14

0.0102

oil

14

0.0102

rice

13

0.0095

proteinbar

12

0.0088

energydrink

11

0.0080

soup

11

0.0080

strawberries

11

0.0080

almonds

10

0.0073

bodyspray

10

0.0073

greenbeans

10

0.0073

gums

10

0.0073

carrots

9

0.0066

eggplant

9

0.0066

yogurtcake

9

0.0066

barbecuesauce

8

0.0058

extradarkchocolate

8

0.0058

mint

8

0.0058

mushroomcreamsauce

8

0.0058

shampoo

8

0.0058

spaghetti

8

0.0058

wholeweatflour

8

0.0058

avocado

7

0.0051

blacktea

7

0.0051

blueberries

7

0.0051

melons

7

0.0051

vegetablesmix

7

0.0051

zucchini

7

0.0051

butter

6

0.0044

chili

6

0.0044

clothesaccessories

6

0.0044

handproteinbar

6

0.0044

sparklingwater

6

0.0044

bacon

5

0.0037

bugspray

5

0.0037

cider

5

0.0037

corn

5

0.0037

ham

5

0.0037

meatballs

5

0.0037

nonfatmilk

5

0.0037

salt

5

0.0037

candybars

4

0.0029

cauliflower

4

0.0029

energybar

4

0.0029

magazines

4

0.0029

mashedpotato

4

0.0029

shallot

4

0.0029

strongcheese

4

0.0029

tea

4

0.0029

toothpaste

4

0.0029

burgersauce

3

0.0022

chutney

3

0.0022

fromageblanc

3

0.0022

greengrapes

3

0.0022

ketchup

3

0.0022

petfood

3

0.0022

sandwich

3

0.0022

soda

3

0.0022

antioxydantjuice

2

0.0015

asparagus

2

0.0015

babiesfood

2

0.0015

bramble

2

0.0015

flaxseed

2

0.0015

glutenfreebar

2

0.0015

groundbeef

2

0.0015

mayonnaise

2

0.0015

muffins

2

0.0015

oatmeal

2

0.0015

salad

2

0.0015

spinach

2

0.0015

yams

2

0.0015

chocolatebread

1

0.0007

mintgreentea

1

0.0007

parmesancheese

1

0.0007

pepper

1

0.0007

## [1] "item_8"

## 

item_8

count

pct

greentea

67

0.0683

pancakes

44

0.0449

lowfatyogurt

43

0.0438

frenchfries

40

0.0408

chocolate

38

0.0387

frozensmoothie

38

0.0387

freshbread

35

0.0357

eggs

28

0.0285

escalope

28

0.0285

cake

27

0.0275

chicken

25

0.0255

honey

24

0.0245

tomatojuice

24

0.0245

cookingoil

23

0.0234

cottagecheese

22

0.0224

brownies

21

0.0214

milk

20

0.0204

cookies

15

0.0153

hotdogs

14

0.0143

lightmayo

13

0.0133

wholewheatrice

13

0.0133

cereals

12

0.0122

champagne

12

0.0122

lightcream

12

0.0122

proteinbar

12

0.0122

yogurtcake

12

0.0122

carrots

11

0.0112

energydrink

10

0.0102

greenbeans

10

0.0102

salt

10

0.0102

magazines

9

0.0092

asparagus

8

0.0082

gums

8

0.0082

mint

8

0.0082

mushroomcreamsauce

8

0.0082

oil

8

0.0082

oliveoil

8

0.0082

salmon

8

0.0082

strawberries

8

0.0082

toothpaste

8

0.0082

whitewine

8

0.0082

almonds

7

0.0071

antioxydantjuice

7

0.0071

blueberries

7

0.0071

melons

7

0.0071

rice

7

0.0071

shampoo

7

0.0071

chili

6

0.0061

glutenfreebar

6

0.0061

vegetablesmix

6

0.0061

avocado

5

0.0051

candybars

5

0.0051

eggplant

5

0.0051

mineralwater

5

0.0051

petfood

5

0.0051

zucchini

5

0.0051

bacon

4

0.0041

barbecuesauce

4

0.0041

mayonnaise

4

0.0041

salad

4

0.0041

spinach

4

0.0041

wholeweatflour

4

0.0041

bodyspray

3

0.0031

bugspray

3

0.0031

burgersauce

3

0.0031

butter

3

0.0031

chutney

3

0.0031

extradarkchocolate

3

0.0031

frenchwine

3

0.0031

handproteinbar

3

0.0031

ketchup

3

0.0031

mintgreentea

3

0.0031

pasta

3

0.0031

sandwich

3

0.0031

shallot

3

0.0031

tea

3

0.0031

blacktea

2

0.0020

bramble

2

0.0020

chocolatebread

2

0.0020

cider

2

0.0020

clothesaccessories

2

0.0020

fromageblanc

2

0.0020

greengrapes

2

0.0020

mashedpotato

2

0.0020

muffins

2

0.0020

oatmeal

2

0.0020

spaghetti

2

0.0020

sparklingwater

2

0.0020

babiesfood

1

0.0010

cauliflower

1

0.0010

corn

1

0.0010

energybar

1

0.0010

flaxseed

1

0.0010

meatballs

1

0.0010

napkins

1

0.0010

soda

1

0.0010

soup

1

0.0010

## [1] "item_9"

## 

item_9

count

pct

greentea

57

0.0872

lowfatyogurt

38

0.0581

frozensmoothie

35

0.0535

frenchfries

34

0.0520

freshbread

28

0.0428

pancakes

21

0.0321

eggs

19

0.0291

chocolate

18

0.0275

escalope

18

0.0275

tomatojuice

18

0.0275

brownies

15

0.0229

cookingoil

15

0.0229

chicken

13

0.0199

energydrink

13

0.0199

hotdogs

13

0.0199

cake

11

0.0168

mint

11

0.0168

proteinbar

11

0.0168

honey

10

0.0153

lightmayo

10

0.0153

rice

10

0.0153

zucchini

10

0.0153

cottagecheese

9

0.0138

carrots

8

0.0122

cookies

8

0.0122

oliveoil

8

0.0122

wholewheatrice

8

0.0122

almonds

7

0.0107

magazines

7

0.0107

milk

7

0.0107

oil

7

0.0107

cereals

6

0.0092

extradarkchocolate

6

0.0092

lightcream

6

0.0092

mushroomcreamsauce

6

0.0092

salmon

6

0.0092

bacon

5

0.0076

bodyspray

5

0.0076

cauliflower

5

0.0076

petfood

5

0.0076

wholeweatflour

5

0.0076

barbecuesauce

4

0.0061

burgersauce

4

0.0061

champagne

4

0.0061

mayonnaise

4

0.0061

melons

4

0.0061

mintgreentea

4

0.0061

muffins

4

0.0061

whitewine

4

0.0061

chili

3

0.0046

eggplant

3

0.0046

frenchwine

3

0.0046

glutenfreebar

3

0.0046

greenbeans

3

0.0046

mashedpotato

3

0.0046

oatmeal

3

0.0046

sandwich

3

0.0046

shampoo

3

0.0046

strongcheese

3

0.0046

yogurtcake

3

0.0046

asparagus

2

0.0031

babiesfood

2

0.0031

blueberries

2

0.0031

fromageblanc

2

0.0031

gums

2

0.0031

mineralwater

2

0.0031

sparklingwater

2

0.0031

tea

2

0.0031

toothpaste

2

0.0031

antioxydantjuice

1

0.0015

avocado

1

0.0015

blacktea

1

0.0015

bramble

1

0.0015

bugspray

1

0.0015

candybars

1

0.0015

chocolatebread

1

0.0015

chutney

1

0.0015

cider

1

0.0015

clothesaccessories

1

0.0015

corn

1

0.0015

energybar

1

0.0015

handproteinbar

1

0.0015

pasta

1

0.0015

salad

1

0.0015

soda

1

0.0015

soup

1

0.0015

strawberries

1

0.0015

vegetablesmix

1

0.0015

## [1] "item_10"

## 

item_10

count

pct

greentea

31

0.0785

frenchfries

19

0.0481

lowfatyogurt

17

0.0430

tomatojuice

17

0.0430

pancakes

14

0.0354

brownies

12

0.0304

cereals

12

0.0304

escalope

12

0.0304

frozensmoothie

12

0.0304

chocolate

11

0.0278

cottagecheese

11

0.0278

eggs

10

0.0253

hotdogs

10

0.0253

cake

9

0.0228

wholewheatrice

9

0.0228

cookingoil

8

0.0203

energydrink

7

0.0177

yogurtcake

7

0.0177

champagne

6

0.0152

mint

6

0.0152

bodyspray

5

0.0127

carrots

5

0.0127

cookies

5

0.0127

freshbread

5

0.0127

mayonnaise

5

0.0127

oil

5

0.0127

strawberries

5

0.0127

vegetablesmix

5

0.0127

whitewine

5

0.0127

bugspray

4

0.0101

cauliflower

4

0.0101

chicken

4

0.0101

clothesaccessories

4

0.0101

extradarkchocolate

4

0.0101

glutenfreebar

4

0.0101

handproteinbar

4

0.0101

honey

4

0.0101

shampoo

4

0.0101

gums

3

0.0076

lightmayo

3

0.0076

mushroomcreamsauce

3

0.0076

oatmeal

3

0.0076

proteinbar

3

0.0076

rice

3

0.0076

shallot

3

0.0076

sparklingwater

3

0.0076

toothpaste

3

0.0076

barbecuesauce

2

0.0051

blacktea

2

0.0051

eggplant

2

0.0051

energybar

2

0.0051

frenchwine

2

0.0051

magazines

2

0.0051

melons

2

0.0051

milk

2

0.0051

muffins

2

0.0051

pasta

2

0.0051

salmon

2

0.0051

salt

2

0.0051

wholeweatflour

2

0.0051

zucchini

2

0.0051

asparagus

1

0.0025

babiesfood

1

0.0025

blueberries

1

0.0025

burgersauce

1

0.0025

candybars

1

0.0025

chili

1

0.0025

chocolatebread

1

0.0025

cider

1

0.0025

fromageblanc

1

0.0025

greengrapes

1

0.0025

ketchup

1

0.0025

lightcream

1

0.0025

mashedpotato

1

0.0025

mintgreentea

1

0.0025

napkins

1

0.0025

nonfatmilk

1

0.0025

petfood

1

0.0025

soda

1

0.0025

tea

1

0.0025

## [1] "item_11"

## 

item_11

count

pct

lowfatyogurt

22

0.0859

greentea

20

0.0781

freshbread

14

0.0547

frenchfries

12

0.0469

lightmayo

9

0.0352

brownies

8

0.0312

frozensmoothie

8

0.0312

cake

7

0.0273

chicken

7

0.0273

cookingoil

7

0.0273

escalope

7

0.0273

pancakes

7

0.0273

chocolate

6

0.0234

eggs

6

0.0234

energydrink

6

0.0234

tomatojuice

6

0.0234

wholewheatrice

6

0.0234

champagne

5

0.0195

mint

5

0.0195

magazines

4

0.0156

proteinbar

4

0.0156

strawberries

4

0.0156

toothpaste

4

0.0156

yogurtcake

4

0.0156

cereals

3

0.0117

cookies

3

0.0117

hotdogs

3

0.0117

salmon

3

0.0117

babiesfood

2

0.0078

carrots

2

0.0078

cauliflower

2

0.0078

chili

2

0.0078

clothesaccessories

2

0.0078

cottagecheese

2

0.0078

eggplant

2

0.0078

extradarkchocolate

2

0.0078

gums

2

0.0078

honey

2

0.0078

ketchup

2

0.0078

melons

2

0.0078

nonfatmilk

2

0.0078

oil

2

0.0078

oliveoil

2

0.0078

salt

2

0.0078

sparklingwater

2

0.0078

whitewine

2

0.0078

asparagus

1

0.0039

barbecuesauce

1

0.0039

blacktea

1

0.0039

bodyspray

1

0.0039

bramble

1

0.0039

burgersauce

1

0.0039

candybars

1

0.0039

chocolatebread

1

0.0039

corn

1

0.0039

handproteinbar

1

0.0039

mayonnaise

1

0.0039

muffins

1

0.0039

mushroomcreamsauce

1

0.0039

rice

1

0.0039

salad

1

0.0039

sandwich

1

0.0039

spinach

1

0.0039

strongcheese

1

0.0039

tea

1

0.0039

wholeweatflour

1

0.0039

## [1] "item_12"

## 

item_12

count

pct

greentea

15

0.0974

frenchfries

10

0.0649

frozensmoothie

10

0.0649

lowfatyogurt

9

0.0584

freshbread

7

0.0455

pancakes

7

0.0455

tomatojuice

7

0.0455

honey

4

0.0260

mint

4

0.0260

cake

3

0.0195

cereals

3

0.0195

chocolate

3

0.0195

cookies

3

0.0195

cottagecheese

3

0.0195

energydrink

3

0.0195

escalope

3

0.0195

hotdogs

3

0.0195

lightmayo

3

0.0195

magazines

3

0.0195

mayonnaise

3

0.0195

melons

3

0.0195

mushroomcreamsauce

3

0.0195

proteinbar

3

0.0195

whitewine

3

0.0195

brownies

2

0.0130

candybars

2

0.0130

champagne

2

0.0130

eggs

2

0.0130

extradarkchocolate

2

0.0130

ketchup

2

0.0130

salt

2

0.0130

shampoo

2

0.0130

wholeweatflour

2

0.0130

wholewheatrice

2

0.0130

blueberries

1

0.0065

bodyspray

1

0.0065

chicken

1

0.0065

cookingoil

1

0.0065

corn

1

0.0065

eggplant

1

0.0065

glutenfreebar

1

0.0065

greengrapes

1

0.0065

ham

1

0.0065

mashedpotato

1

0.0065

rice

1

0.0065

strawberries

1

0.0065

strongcheese

1

0.0065

toothpaste

1

0.0065

vegetablesmix

1

0.0065

zucchini

1

0.0065

## [1] "item_13"

## 

item_13

count

pct

greentea

8

0.0920

freshbread

6

0.0690

lowfatyogurt

6

0.0690

escalope

4

0.0460

frenchfries

4

0.0460

tomatojuice

4

0.0460

brownies

3

0.0345

hotdogs

3

0.0345

melons

3

0.0345

salt

3

0.0345

energydrink

2

0.0230

frozensmoothie

2

0.0230

gums

2

0.0230

lightmayo

2

0.0230

mint

2

0.0230

nonfatmilk

2

0.0230

pancakes

2

0.0230

proteinbar

2

0.0230

sparklingwater

2

0.0230

yogurtcake

2

0.0230

asparagus

1

0.0115

barbecuesauce

1

0.0115

bugspray

1

0.0115

burgersauce

1

0.0115

cauliflower

1

0.0115

cereals

1

0.0115

champagne

1

0.0115

cookies

1

0.0115

cookingoil

1

0.0115

corn

1

0.0115

cottagecheese

1

0.0115

eggplant

1

0.0115

ham

1

0.0115

honey

1

0.0115

magazines

1

0.0115

mashedpotato

1

0.0115

oil

1

0.0115

salad

1

0.0115

tea

1

0.0115

toothpaste

1

0.0115

whitewine

1

0.0115

wholewheatrice

1

0.0115

zucchini

1

0.0115

## [1] "item_14"

## 

item_14

count

pct

greentea

4

0.0851

frenchfries

3

0.0638

frozensmoothie

3

0.0638

cottagecheese

2

0.0426

eggplant

2

0.0426

escalope

2

0.0426

freshbread

2

0.0426

glutenfreebar

2

0.0426

hotdogs

2

0.0426

lowfatyogurt

2

0.0426

mint

2

0.0426

oil

2

0.0426

pancakes

2

0.0426

salad

2

0.0426

toothpaste

2

0.0426

babiesfood

1

0.0213

cookies

1

0.0213

eggs

1

0.0213

handproteinbar

1

0.0213

lightmayo

1

0.0213

mineralwater

1

0.0213

muffins

1

0.0213

mushroomcreamsauce

1

0.0213

proteinbar

1

0.0213

salt

1

0.0213

strawberries

1

0.0213

tomatojuice

1

0.0213

yogurtcake

1

0.0213

## [1] "item_15"

## 

item_15

count

pct

magazines

3

0.12

freshbread

2

0.08

greentea

2

0.08

lowfatyogurt

2

0.08

pancakes

2

0.08

candybars

1

0.04

cereals

1

0.04

clothesaccessories

1

0.04

cookies

1

0.04

handproteinbar

1

0.04

hotdogs

1

0.04

mineralwater

1

0.04

mint

1

0.04

mushroomcreamsauce

1

0.04

salmon

1

0.04

tomatojuice

1

0.04

toothpaste

1

0.04

yogurtcake

1

0.04

zucchini

1

0.04

## [1] "item_16"

## 

item_16

count

pct

antioxydantjuice

1

0.125

cake

1

0.125

chocolate

1

0.125

frozensmoothie

1

0.125

magazines

1

0.125

proteinbar

1

0.125

salmon

1

0.125

sparklingwater

1

0.125

## [1] "item_17"

## 

item_17

count

pct

frozensmoothie

2

0.50

antioxydantjuice

1

0.25

frenchfries

1

0.25

## [1] "item_18"

## 

item_18

count

pct

proteinbar

2

0.50

frozensmoothie

1

0.25

spinach

1

0.25

## [1] "item_19"

## 

item_19

count

pct

cereals

1

0.3333

mayonnaise

1

0.3333

spinach

1

0.3333

## [1] "item_20"

## 

item_20

count

pct

oliveoil

1

1

## [1] "Done Processing" ## [1] "20 variables processed" ## [1] "Function Efficiency Statistics"
##    user  system elapsed 
##   11.90    0.55   12.82

Apriori

# Load the libraries
library(arules)
library(arulesViz)

#Set NA values on dataframe excluding the transaction_id
mb_data_no_id[mb_data_no_id==""]<-NA

#Rename dataframe
mb_data_ar <- mb_data_no_id

#Run Apriori to generate rules
rules <- apriori(mb_data_ar, parameter = list(supp = 0.001, conf = 0.8))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 7 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[1268 item(s), 7501 transaction(s)] done [0.01s].
## sorting and recoding items ... [608 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [35 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
## For better comparison we sort the rules by confidence and add Bayardo's improvement.
## https://stackoverflow.com/questions/38796256/association-rule-in-r-removing-redundant-rule-arules/38809330
## https://cran.r-project.org/web/packages/arules/arules.pdf
## Bayardo's rule: The improvement of a rule is the minimum difference between its confidence and the confidence of any more general rule (i.e., a rule with the same consequent but one or more items removed in the LHS).

rules <- sort(rules, by="confidence", decreasing=TRUE)
quality(rules)$improvement <- interestMeasure(rules, measure = "improvement")
options(digits=2)

#Remove duplicate rules
#https://stackoverflow.com/questions/47928125/removing-inverted-reverse-duplicate-rules-from-apriori-result-in-r
generating_itemsets <- generatingItemsets(rules)
duplicated_generating_itemsets <- which(duplicated(generating_itemsets))
rules <- rules[-duplicated_generating_itemsets]
#Put non-redundant rules into dataframe and display
df_inspect_rules = data.frame(
       antecedent_lhs = labels(lhs(rules)),
       consequent_rhs = labels(rhs(rules)),
       rules@quality)

#Add rule label
df_inspect_rules$rule <- seq.int(nrow(df_inspect_rules))

#Move rule label to front of table
df_inspect_rules <- df_inspect_rules %>%
  select(rule, everything())

#Round rules table to 3 digits
df_inspect_rules <- df_inspect_rules %>% 
 mutate_if(is.numeric, round, digits=4)

#Display table
datatable(df_inspect_rules, extensions = 'FixedColumns',
          rownames=FALSE,
          class = 'cell-border stripe compact hover',
          caption = "Association Rules Results",
          options = list(pageLength = 50, 
                         autoWidth = TRUE,
                         dom = 'Bfrtip',
                         scrollX = TRUE,
                         fixedColumns=list(leftColumns = 3)))
#Apriori Summary
summary(rules)
## set of 31 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2  3  4 
##  2 27  2 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       3       3       3       3       4 
## 
## summary of quality measures:
##     support         confidence      coverage           lift         count   
##  Min.   :0.0011   Min.   :0.80   Min.   :0.0011   Min.   : 14   Min.   : 8  
##  1st Qu.:0.0011   1st Qu.:0.83   1st Qu.:0.0012   1st Qu.: 22   1st Qu.: 8  
##  Median :0.0012   Median :0.91   Median :0.0015   Median : 40   Median : 9  
##  Mean   :0.0018   Mean   :0.90   Mean   :0.0020   Mean   : 56   Mean   :13  
##  3rd Qu.:0.0016   3rd Qu.:0.97   3rd Qu.:0.0017   3rd Qu.: 78   3rd Qu.:12  
##  Max.   :0.0099   Max.   :1.00   Max.   :0.0104   Max.   :375   Max.   :74  
##   improvement     
##  Min.   :-0.0202  
##  1st Qu.:    Inf  
##  Median :    Inf  
##  Mean   :    Inf  
##  3rd Qu.:    Inf  
##  Max.   :    Inf  
## 
## mining info:
##        data ntransactions support confidence
##  mb_data_ar          7501   0.001        0.8
#https://www.rdocumentation.org/packages/arulesViz/versions/1.3-3/topics/plotly_arules
# interactive scatter plot visualizations

#Confidence vs. Support by Lift
plotly_arules(rules, 
              jitter = 20, 
              marker = list(opacity = .7, size = 8, symbol = 4), 
              colors = c("blue", "red"))
#Lift vs. Support by Lift
plotly_arules(rules, 
              measure = c("support", "lift"), 
              jitter = 20, 
              marker = list(opacity = .7, size = 8, symbol = 4), 
              colors = c("blue", "red"))
#Lift vs. Confidence by Lift
plotly_arules(rules, 
              measure = c("confidence", "lift"), 
              jitter = 20, 
              marker = list(opacity = .7, size = 8, symbol = 4), 
              colors = c("blue", "red"))
# save plot as a html page, embed in iframe
p <- plot(rules,method="graph",engine="htmlwidget", shading="lift")
htmlwidgets::saveWidget(p, "arules.html", selfcontained = TRUE)
#browseURL("arules.html")
htmltools::tags$iframe(width=900, 
                       height=600, 
                       title = "Association Rules", 
                       src = "arules.html", 
                       scrolling="no",
                       style="border:1px solid black;")
# https://cran.r-project.org/web/packages/arulesViz/arulesViz.pdf
# Represents the rules (or itemsets) as a graph with items as labeled vertices, and rules (or
# itemsets) represented as vertices connected to items using arrows. For rules, the LHS items
# are connected with arrows pointing to the vertex representing the rule and the RHS has an
# arrow pointing to the item.

Recommendation and Summary

Business Objective

Understand a grocery store’s customer purchasing behavior though the Apriori algorithm to create Association Rules, also known as Market Basket analysis, and provide actionable recommendations.

Recommendation

The Market Basket analysis results revealed 31 rules with high confidence (> .8). Of those rules, eight are selected on a lift basis for store management to consider for placement strategy. The eight rules are formed into two groups for this analysis: “Budget Dinners” and “Fancy Dinners”.

There are many potential uses for these results. The rules identify which items should be placed near each other on a shelf to tempt likely buyers into purchasing additional items. Discounting tactics could be used on items identified in the rules. Buyers who purchase one of the rule items could be targeted with advertisements for the other items contained in the rule.

Results Summary

The two highest-impact Association Rules groups selected clustered around an approximate lift value of 100. This can be interpreted as a customer is 100 times more likely to buy the consequent (then statement) when the antecedent (if statement) is present.

The Budget Dinners show shoppers are more likely to buy spaghetti at varying lift degrees if mineral water, milk, frozen vegetables, ground beef and burgers are present in the transaction. The Fancy Dinners show transactions grouping on three consequent items: escalope, frozen vegetables and mineral water. The antecedent items associated with the Fancy Dinners group are pasta, mushroom cream sauce, french fries, shrimp, tomatoes, spaghetti and chocolate.

The confidence in the Fancy Dinners group, for example, shows a transaction containing pasta and mushroom cream sauce is 100% more likely to contain escalope. This conclusion may change in a dataset with higher support for this specific rule.

The rule with the highest support, ‘If burgers, then turkey’, indicates the store sells burgers and turkey together at a high frequency (rule appears in .0099% of all transactions, 79 instances).

  • Rules Group Description: Fancy Dinners
  • Rules Group characteristics: High lift, higher confidence
    • Rule 2: If pasta and mushroom cream sauce, then escalope.
    • Rule 3: If french fries and pasta, then escalope.
    • Rule 12: If shrimp and tomatoes, then frozen vegetables.
    • Rule 17: If spaghetti and chocolate, then mineral water.
  • Rules Group Description: Budget Dinners
  • Rules Group Characteristics: High lift, lower confidence
    • Rule 19: If mineral water and milk, then spaghetti.
    • Rule 23: If frozen vegetables and mineral water, then spaghetti.
    • Rule 26: If ground beef and mineral water, than spaghetti
    • Rule 29: If burgers and mineral water, than spaghetti.
  • Rules with highest support (rule instances)
    • Rule 9: If burgers, then turkey (79).
    • Rule 28: If ground beef and mineral water, then spaghetti (33, 27)
    • Rule 10: If shrimp and tomatoes, then frozen vegetables (18).

Association Rules

Apriori Discussion

The set parameters for the Apriori algorithm returned results with support greater than .001 and confidence greater than .8.

  • Of the 31 non-redundant rules generated, 27 rules contained 3 items, 2 rules contained 2 items, and 2 rules contained 4 items.
  • The average support for non-redundant rules is .00018.
  • The average confidence for non-redundant rules is .9.
  • The average lift is 56, with a minimum of 14 and a maximum of 375.
  • The average count of rule instances is 13, with a minimum of 8 and a maximum of 74.

The evaluation measure for recommendation is lift. Confidence is used as a secondary measure followed by support. This results in the identification of three separate rule groups. The first rule, ‘If olive oil then milk’, has the highest lift (375) but is discarded from recommendation due to a relative lack of support. It is recommended to revisit this conclusion in a future analysis run against more transactions. The other two rule groups are identified near lift values of 100. The primary separator between the Budget Dinners and the Fancy Dinners groups are confidence. The Fancy Dinners display higher confidence (>.9) compared to the Budget Dinners (<.9).

Lift vs. Confidence by Lift

Interactive Association Rules

This interactive chart represents the association rules as a graph with items as labeled vertices, and rules represented as vertices connected to items using arrows. For rules, the antecedent items are connected with arrows pointing to the vertex representing the rule and the consequent has an arrow pointing to the item.

About the Dataset

  • Detailed frequency statistics of all items sold and item purchase position are contained on the ‘Data Summary’ tab.
  • There are 7,501 transactions containing 29,363 items sold.
  • Mineral water is the top selling item, followed by eggs and spaghetti.
Sales Rank Item Count Pct of Items Sold
1 mineralwater 1788 .0609
2 eggs 1348 .0459
3 spaghetti 1306 .0445
4 frenchfries 1282 .0437
5 chocolate 1230 .0419
6 greentea 991 .0337
7 milk 972 .0331
8 groundbeef 737 .0251
9 frozenvegetables 715 .0244
10 pancakes 713 .0243
All Items All Items 29,363 1

Data Source and Background

Additional Citations

#Time Script

end.time <- Sys.time()
elapsed.time <- round((end.time - start.time), 3)

paste0("Elapsed Time is : ", elapsed.time )
## [1] "Elapsed Time is : 24.649"