Advent of Code 2020-07 with R & Neo4j

4 minute(s) read

Solving Advent of Code 2020-07 with R & Neo4j.

[Disclaimer] Obviously, this post contains a big spoiler about Advent of Code.

Instructions

Find the complete instructions at: https://adventofcode.com/2020/day/7.

R & Neo4j solution

As today’s options was a graph challenge, I decided to play a little bit with R and Neo4J.

Part one

library(tidyverse)
## ── Attaching packages ───────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.2
## ✓ tidyr   1.0.3     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ──────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Reading the data
input <- read.delim(
  "2020-07-aoc.txt",
  header = FALSE,
  stringsAsFactors = FALSE
) %>% as_tibble()


clean <- input %>%
  # Extracting the origin
  mutate(
    origin = gsub("(.*) bags .*", "\\1", V1),
    contain = gsub(".* contain (.*) bags*\\.", "\\1", V1),
    contain = map(contain, ~ (strsplit(.x, ","))[[1]])
  ) %>%
  unnest(contain) %>%
  # Extracting what they can contain
  mutate(
    n_contain = gsub(".*([0-9]+).*", "\\1", contain),
    n_contain = case_when(
      n_contain == "no other" ~ "0",
      TRUE  ~  n_contain
    ) %>% as.character(),
    contain = gsub(" bags*", "", contain),
    contain = gsub("[0-9]*", "", contain),
    contain = stringr::str_trim(contain)
  ) %>%
  # Keeping only the needed elements
  select(
    origin, contain, n_contain
  )
clean
## # A tibble: 1,395 x 3
##    origin         contain         n_contain
##    <chr>          <chr>           <chr>
##  1 clear maroon   dull lavender   1
##  2 wavy turquoise vibrant magenta 4
##  3 wavy turquoise light violet    4
##  4 wavy turquoise bright gold     5
##  5 wavy turquoise faded black     2
##  6 wavy beige     plaid magenta   3
##  7 wavy beige     wavy lime       3
##  8 wavy beige     clear turquoise 2
##  9 wavy beige     muted cyan      3
## 10 mirrored black plaid red       1
## # … with 1,385 more rows
# Launching the container
#
system("docker run --rm --name neo4j --env NEO4J_AUTH=neo4j/password --publish=7687:7687 --publish=7474:7474 -v $(pwd):/var/lib/neo4j/import -d neo4j:3.4")

library(neo4r)
# Connecting to the Neo4j API
con <- neo4j_api$new(
  url = "http://localhost:7474",
  user = "neo4j",
  password = "password"
)

# Waiting for the container to be ready
Sys.sleep(10)
while (try(con$ping()) != 200){
  Sys.sleep(10)
}
# Writing CSV for Neo4J
data.frame(
  nodes = unique(clean$origin, clean$contain)
) %>%
  write_csv("aoc07_nodes.csv")
clean %>%
  write_csv("aoc07_relationships.csv")
# Adding constraint of uniqueness on names
"CREATE CONSTRAINT ON (origin:origin) ASSERT origin.name IS UNIQUE;" %>%
  call_neo4j(con, include_stats = TRUE)
## No data returned.

## # A tibble: 12 x 2
##    type                  value
##    <chr>                 <dbl>
##  1 contains_updates          1
##  2 nodes_created             0
##  3 nodes_deleted             0
##  4 properties_set            0
##  5 relationships_created     0
##  6 relationship_deleted      0
##  7 labels_added              0
##  8 labels_removed            0
##  9 indexes_added             0
## 10 indexes_removed           0
## 11 constraints_added         1
## 12 constraints_removed       0
"CREATE CONSTRAINT ON (contain:contain) ASSERT contain.name IS UNIQUE;" %>%
  call_neo4j(con, include_stats = TRUE)
## No data returned.

## # A tibble: 12 x 2
##    type                  value
##    <chr>                 <dbl>
##  1 contains_updates          1
##  2 nodes_created             0
##  3 nodes_deleted             0
##  4 properties_set            0
##  5 relationships_created     0
##  6 relationship_deleted      0
##  7 labels_added              0
##  8 labels_removed            0
##  9 indexes_added             0
## 10 indexes_removed           0
## 11 constraints_added         1
## 12 constraints_removed       0
# Load the nodes
'LOAD CSV WITH HEADERS FROM "file:///aoc07_nodes.csv" AS csvLine
MERGE (:bag {name : csvLine.nodes} ) ' %>%
  call_neo4j(con, include_stats = TRUE)
## No data returned.

## # A tibble: 12 x 2
##    type                  value
##    <chr>                 <dbl>
##  1 contains_updates          1
##  2 nodes_created           594
##  3 nodes_deleted             0
##  4 properties_set          594
##  5 relationships_created     0
##  6 relationship_deleted      0
##  7 labels_added            594
##  8 labels_removed            0
##  9 indexes_added             0
## 10 indexes_removed           0
## 11 constraints_added         0
## 12 constraints_removed       0
#
# # Load the relationships
'LOAD CSV WITH HEADERS FROM "file:///aoc07_relationships.csv" AS csvLine
MATCH (c:bag {name : csvLine.contain})
MATCH (o:bag {name : csvLine.origin})
MERGE (o) -[:CAN_CONTAIN {n_contain: csvLine.n_contain}]->(c);' %>%
  call_neo4j(con, include_stats = TRUE)
## No data returned.

## # A tibble: 12 x 2
##    type                  value
##    <chr>                 <dbl>
##  1 contains_updates          1
##  2 nodes_created             0
##  3 nodes_deleted             0
##  4 properties_set         1385
##  5 relationships_created  1385
##  6 relationship_deleted      0
##  7 labels_added              0
##  8 labels_removed            0
##  9 indexes_added             0
## 10 indexes_removed           0
## 11 constraints_added         0
## 12 constraints_removed       0
# Count all the bags that can eventually contain a shiny gold ones
'MATCH (b:bag)-[:CAN_CONTAIN*1..]-> (:bag {name:"shiny gold"}) RETURN count( DISTINCT b) AS count' %>%
  call_neo4j(con)
## $count
## # A tibble: 1 x 1
##   value
##   <int>
## 1   126
##
## attr(,"class")
## [1] "neo"  "list"

Part two

'MATCH (:bag {name:"shiny gold"})-[r:CAN_CONTAIN*1..]-> (:bag) WITH reduce(tot = 1, i IN r| tot * toInteger(i.n_contain)) AS total RETURN sum(total) AS total' %>%
  call_neo4j(con)
## $total
## # A tibble: 1 x 1
##    value
##    <int>
## 1 220149
##
## attr(,"class")
## [1] "neo"  "list"
# Removing the container
system("docker kill neo4j")

unlink("aoc07_nodes.csv")
unlink("aoc07_relationships.csv")

What do you think?