# Load required packages from local library into R session. library(dplyr) # Wrangling: mutate(). library(stringi) # The string concat operator %s+%. library(stringr) # String manipulation. library(glue) # Format strings. library(magrittr) # Pipelines for data processing: %>% %T>% %<>%. library(rattle.data) # Weather dataset. library(scales) # commas(), percent(). "abc" %s+% "def" %s+% "ghi" c("abc", "def", "ghi", "jkl") %s+% c("mno") c("abc", "def", "ghi", "jkl") %s+% c("mno", "pqr") c("abc", "def", "ghi", "jkl") %s+% c("mno", "pqr", "stu", "vwx") str_c("hello", "world") str_c("hello", "world", sep=" ") glue("hello", "world") cat("hello", "world") cat ("hello", 123, "world") paste("hello", "world") "hello" %s+% NULL %s+% "world" str_c("hello", NULL, "world") glue("hello", NULL, "world") cat("hello", NULL, "world") paste("hello", NULL, "world") "hello" %s+% NA %s+% "world" str_c("hello", NA, "world") glue("hello", NA, "world") cat("hello", NA, "world") paste("hello", NA, "world") str_length("hello world") str_length(c("hello", "world")) str_length(NULL) str_length(NA) nchar("hello world") nchar(c("hello", "world")) nchar(NULL) nchar(NA) toupper("String Manipulation") tolower("String Manipulation") casefold("String Manipulation") casefold("String Manipulation", upper=TRUE) s <- "string manipulation" str_sub(s, start=3, end=6) str_sub(s, 3, 6) str_sub(s, 1, -8) str_sub(s, 1, -8) <- "stip" s v <- c("string", "manipulation", "always", "fascinating") str_sub(v, -4, -1) str_sub(v, -4, -1) <- "RING" v s <- "string manipulation" substr(s, start=3, stop=6) substr(s, 3, 6) substr(s, 1, 12) <- "stip" s s <- "string manipulation" substring(s, first=3, last=6) x <- c("abcd", "aabcb", "babcc", "cabcd") substring(x, 2, 4) substring(x, 2, 4) <- "AB" x ws <- c(" abc", "def ", " ghi ") str_trim(ws) str_trim(ws, side="left") str_trim(ws, side="right") str_trim(ws, side="both") str_pad("abc", width=7) str_pad("abc", width=7, side="left") str_pad("abc", width=7, side="right") str_pad("abc", width=7, side="both", pad="#") st <- "All the Worlds a stage, All men are merely players" cat(str_wrap(st, width=25)) st <- c("The quick brown fox", "jumps on the brown dog") word(st, start=1, end=2) word(st, start=1, end=-2) dsname <- "weatherAUS" nobs <- nrow(weatherAUS) starts <- min(weatherAUS$Date) glue("The {dsname} dataset", " has just less than {comma(nobs + 1)} observations,", " starting from {format(starts, '%-d %B %Y')}.") glue(" The {dsname} dataset has just less than {comma(nobs + 1)} observations starting from {format(starts, '%-d %B %Y')}. ") glue(" The {dsname} dataset has just less than {comma(nobs + 1)} observations starting from {format(starts, '%-d %B %Y')}. ", dsname = "weather", nobs = nrow(weather), starts = min(weather$Date)) weatherAUS %>% sample_n(6) %>% glue_data("Observation", " {rownames(.) %>% as.integer() %>% comma() %>% sprintf('%7s', .)}", " location {Location %>% sprintf('%-14s', .)}", " max temp {MaxTemp %>% sprintf('%5.1f', .)}") weatherAUS %>% sample_n(6) %>% mutate(TempRange = glue("{MinTemp}-{MaxTemp}")) %>% glue_data("Observed temperature range at {Location} of {TempRange}") glue(" A formatted string \\ can also be on a \\ single line ") name <- "Fred" glue("My name is {name}, not {{name}}.") one <- "1" glue("The value of $e^{2\\pi i}$ is $<>$.", .open = "<<", .close = ">>") `foo}\`` <- "foo" glue("{ { '}\\'' # { and } in comments, single quotes \"}\\\"\" # or double quotes are ignored `foo}\\`` # as are { in backticks } }") library(glue) con <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") colnames(iris) <- gsub("[.]", "_", tolower(colnames(iris))) DBI::dbWriteTable(con, "iris", iris) var <- "sepal_width" tbl <- "iris" num <- 2 val <- "setosa" glue_sql(" SELECT {`var`} FROM {`tbl`} WHERE {`tbl`}.sepal_length > {num} AND {`tbl`}.species = {val} ", .con = con) sql <- glue_sql(" SELECT {`var`} FROM {`tbl`} WHERE {`tbl`}.sepal_length > ? ", .con = con) query <- DBI::dbSendQuery(con, sql) DBI::dbBind(query, list(num)) DBI::dbFetch(query, n = 4) DBI::dbClearResult(query) # `glue_sql()` can be used to build up more complex queries with # interchangeable sub queries. It returns `DBI::SQL()` objects which are # properly protected from quoting. sub_query <- glue_sql(" SELECT * FROM {`tbl`} ", .con = con) glue_sql(" SELECT s.{`var`} FROM ({sub_query}) AS s ", .con = con) #> SELECT s.`sepal_width` #> FROM (SELECT * #> FROM `iris`) AS s # If you want to input multiple values for use in SQL IN statements put `*` # at the end of the value and the values will be collapsed and quoted appropriately. glue_sql("SELECT * FROM {`tbl`} WHERE sepal_length IN ({vals*})", vals = 1, .con = con) #> SELECT * FROM `iris` WHERE sepal_length IN (1) glue_sql("SELECT * FROM {`tbl`} WHERE sepal_length IN ({vals*})", vals = 1:5, .con = con) #> SELECT * FROM `iris` WHERE sepal_length IN (1, 2, 3, 4, 5) glue_sql("SELECT * FROM {`tbl`} WHERE species IN ({vals*})", vals = "setosa", .con = con) #> SELECT * FROM `iris` WHERE species IN ('setosa') glue_sql("SELECT * FROM {`tbl`} WHERE species IN ({vals*})", vals = c("setosa", "versicolor"), .con = con) #> SELECT * FROM `iris` WHERE species IN ('setosa', 'versicolor') "tables/metacharacters.csv" %>% read_csv() %>% xtable() s <- c("hands", "data", "on", "data$cience", "handsondata$cience", "handson") grep(pattern="^data", s, value=TRUE) grep(pattern="on$", s, value=TRUE) grep(pattern="(nd)..(nd)", s, value=TRUE) grep(pattern="\\$", s, value=TRUE) "tables/quantifier.tsv" %>% read_tsv() %>% xtable() s <- c("aaab", "abb", "bc", "abbcd", "bbbc", "abab", "caa") grep(pattern="ab*b", s, value=TRUE) grep(pattern="abbc?", s, value=TRUE) grep(pattern="b{2,}?", s, value=TRUE) "tables/characterclass.tsv" %>% read_tsv() %>% xtable() s <- c("abc12", "@#$", "345", "ABcd") grep(pattern="[0-9]+", s, value=TRUE) grep(pattern="[A-Z]+", s, value=TRUE) grep(pattern="[^@#$]+", s, value=TRUE) grep(pattern="[[:alpha:]]", s, value=TRUE) grep(pattern="[[:upper:]]", s, value=TRUE) stri_rand_lipsum(20) stri_rand_lipsum(2) sapply(stri_rand_lipsum(10), nchar, USE.NAMES=FALSE) sapply(stri_rand_lipsum(10), nchar, USE.NAMES=FALSE) dsname <- "weather" # Dataset name. ftype <- "csv" # Source dataset file type. dsname %s+% "." %s+% ftype %T>% print() %>% system.file(ftype, ., package="rattle") %>% readLines() -> ds head(ds) grep("ENE", ds) grep("ENE", ds, value=TRUE)