Advent of Code 2020-07 with R & Neo4j
Solving Advent of Code 2020-07 with R & Neo4j.
[Disclaimer] Obviously, this post contains a big spoiler about Advent of Code.
Instructions
Find the complete instructions at: https://adventofcode.com/2020/day/7.
R & Neo4j solution
As today’s options was a graph challenge, I decided to play a little bit with R and Neo4J.
Part one
library(tidyverse)
## ── Attaching packages ───────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.0 ✓ purrr 0.3.4
## ✓ tibble 3.0.1 ✓ dplyr 1.0.2
## ✓ tidyr 1.0.3 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Reading the data
input <- read.delim(
"2020-07-aoc.txt",
header = FALSE,
stringsAsFactors = FALSE
) %>% as_tibble()
clean <- input %>%
# Extracting the origin
mutate(
origin = gsub("(.*) bags .*", "\\1", V1),
contain = gsub(".* contain (.*) bags*\\.", "\\1", V1),
contain = map(contain, ~ (strsplit(.x, ","))[[1]])
) %>%
unnest(contain) %>%
# Extracting what they can contain
mutate(
n_contain = gsub(".*([0-9]+).*", "\\1", contain),
n_contain = case_when(
n_contain == "no other" ~ "0",
TRUE ~ n_contain
) %>% as.character(),
contain = gsub(" bags*", "", contain),
contain = gsub("[0-9]*", "", contain),
contain = stringr::str_trim(contain)
) %>%
# Keeping only the needed elements
select(
origin, contain, n_contain
)
clean
## # A tibble: 1,395 x 3
## origin contain n_contain
## <chr> <chr> <chr>
## 1 clear maroon dull lavender 1
## 2 wavy turquoise vibrant magenta 4
## 3 wavy turquoise light violet 4
## 4 wavy turquoise bright gold 5
## 5 wavy turquoise faded black 2
## 6 wavy beige plaid magenta 3
## 7 wavy beige wavy lime 3
## 8 wavy beige clear turquoise 2
## 9 wavy beige muted cyan 3
## 10 mirrored black plaid red 1
## # … with 1,385 more rows
# Launching the container
#
system("docker run --rm --name neo4j --env NEO4J_AUTH=neo4j/password --publish=7687:7687 --publish=7474:7474 -v $(pwd):/var/lib/neo4j/import -d neo4j:3.4")
library(neo4r)
# Connecting to the Neo4j API
con <- neo4j_api$new(
url = "http://localhost:7474",
user = "neo4j",
password = "password"
)
# Waiting for the container to be ready
Sys.sleep(10)
while (try(con$ping()) != 200){
Sys.sleep(10)
}
# Writing CSV for Neo4J
data.frame(
nodes = unique(clean$origin, clean$contain)
) %>%
write_csv("aoc07_nodes.csv")
clean %>%
write_csv("aoc07_relationships.csv")
# Adding constraint of uniqueness on names
"CREATE CONSTRAINT ON (origin:origin) ASSERT origin.name IS UNIQUE;" %>%
call_neo4j(con, include_stats = TRUE)
## No data returned.
## # A tibble: 12 x 2
## type value
## <chr> <dbl>
## 1 contains_updates 1
## 2 nodes_created 0
## 3 nodes_deleted 0
## 4 properties_set 0
## 5 relationships_created 0
## 6 relationship_deleted 0
## 7 labels_added 0
## 8 labels_removed 0
## 9 indexes_added 0
## 10 indexes_removed 0
## 11 constraints_added 1
## 12 constraints_removed 0
"CREATE CONSTRAINT ON (contain:contain) ASSERT contain.name IS UNIQUE;" %>%
call_neo4j(con, include_stats = TRUE)
## No data returned.
## # A tibble: 12 x 2
## type value
## <chr> <dbl>
## 1 contains_updates 1
## 2 nodes_created 0
## 3 nodes_deleted 0
## 4 properties_set 0
## 5 relationships_created 0
## 6 relationship_deleted 0
## 7 labels_added 0
## 8 labels_removed 0
## 9 indexes_added 0
## 10 indexes_removed 0
## 11 constraints_added 1
## 12 constraints_removed 0
# Load the nodes
'LOAD CSV WITH HEADERS FROM "file:///aoc07_nodes.csv" AS csvLine
MERGE (:bag {name : csvLine.nodes} ) ' %>%
call_neo4j(con, include_stats = TRUE)
## No data returned.
## # A tibble: 12 x 2
## type value
## <chr> <dbl>
## 1 contains_updates 1
## 2 nodes_created 594
## 3 nodes_deleted 0
## 4 properties_set 594
## 5 relationships_created 0
## 6 relationship_deleted 0
## 7 labels_added 594
## 8 labels_removed 0
## 9 indexes_added 0
## 10 indexes_removed 0
## 11 constraints_added 0
## 12 constraints_removed 0
#
# # Load the relationships
'LOAD CSV WITH HEADERS FROM "file:///aoc07_relationships.csv" AS csvLine
MATCH (c:bag {name : csvLine.contain})
MATCH (o:bag {name : csvLine.origin})
MERGE (o) -[:CAN_CONTAIN {n_contain: csvLine.n_contain}]->(c);' %>%
call_neo4j(con, include_stats = TRUE)
## No data returned.
## # A tibble: 12 x 2
## type value
## <chr> <dbl>
## 1 contains_updates 1
## 2 nodes_created 0
## 3 nodes_deleted 0
## 4 properties_set 1385
## 5 relationships_created 1385
## 6 relationship_deleted 0
## 7 labels_added 0
## 8 labels_removed 0
## 9 indexes_added 0
## 10 indexes_removed 0
## 11 constraints_added 0
## 12 constraints_removed 0
# Count all the bags that can eventually contain a shiny gold ones
'MATCH (b:bag)-[:CAN_CONTAIN*1..]-> (:bag {name:"shiny gold"}) RETURN count( DISTINCT b) AS count' %>%
call_neo4j(con)
## $count
## # A tibble: 1 x 1
## value
## <int>
## 1 126
##
## attr(,"class")
## [1] "neo" "list"
Part two
'MATCH (:bag {name:"shiny gold"})-[r:CAN_CONTAIN*1..]-> (:bag) WITH reduce(tot = 1, i IN r| tot * toInteger(i.n_contain)) AS total RETURN sum(total) AS total' %>%
call_neo4j(con)
## $total
## # A tibble: 1 x 1
## value
## <int>
## 1 220149
##
## attr(,"class")
## [1] "neo" "list"
# Removing the container
system("docker kill neo4j")
unlink("aoc07_nodes.csv")
unlink("aoc07_relationships.csv")
What do you think?