.chapter13<-function(i=0){ " i Chapter 13: Simple string manipulation - ------------------------------------- 1 Why we should care about string manipulation? 2 Assigning a string variable 3 is.character() and typeof() functions 4 toupper(), and tolower() 5 nchar() fuction 6 substr() function 7 paste() function 8 paste0() 9 sub() function (substitution function) 10 gsub() function (global substitution) 11 as.character(), toSting() 12 as.numeric() 13 date() function 14 Repeating indicator, *, +, ? and . (dot) 15 logic or 16 logic and 17 logic not ^ 18 grep() for a patter 19 strtoi() function, letter vs. LETTERS 20 letters vs. LETTERS variables Example #1:>.c13 # see the above list Example #2:>.c13(1) # see the 1st explanation ";.zchapter13(i)} .n13chapter<-20 .zchapter13<-function(i){ if(i==0){ print(.c13) }else{ if(i<=.n13chapter){ x<-paste('cat(.C13EXPLAIN',i,')',sep='') .runText(x) }else{ cat("Invalid number: input value should between 1 and ", .n13chapter,"\n") } } } .c13<-.chapter13 .C13EXPLAIN1<-"Why should we care about string manipulation? ///////////////////////////////////////// String manipulating is very import part of data manipulation. 1) A true data variable 2) make our program simple Assume that we have SEC index from 1993 to 2019 3) Many different input names 4) others ///////////////////////////////////////// " .C13EXPLAIN2<-"Assigning a string variable ///////////////////////////////////////// x<-\"this is a great course\" y<-'I love small animals' ///////////////////////////////////////// " .C13EXPLAIN3<-"is.character() and typeof() functions ///////////////////////////////////////// x<-\"this is a great course\" y<-'I love small animals' > is.character(x) [1] TRUE > typeof(x) [1] \"character\" ///////////////////////////////////////// " .C13EXPLAIN4<-"toupper(), and tolower() functions ///////////////////////////////////////// > a<-\"Internatonal machine\" > toupper(a) [1] \"INTERNATONAL MACHINE\" > tolower(a) [1] \"internatonal machine\" ///////////////////////////////////////// " .C13EXPLAIN5<-"nchar() function ///////////////////////////////////////// > x<-\"this is a great course\" > > y<-'I love small animals' > nchar(x) [1] 22 > nchar(y) [1] 20 ///////////////////////////////////////// " .C13EXPLAIN6<-"substr() function ///////////////////////////////////////// > a<-\"Internatonal machine\" > substr(a,1,5) [1] \"Inter\" > substr(a,6,10) [1] \"naton\" ///////////////////////////////////////// " .C13EXPLAIN7<-"paste() function ///////////////////////////////////////// > paste(\"I love\",\"dogs\") [1] \"I love dogs\" > paste(\"I love\",\"dogs\",sep=',') [1] \"I love,dogs\" > paste(\"I love\",\"dogs\",sep='') [1] \"I lovedogs\" > ///////////////////////////////////////// " .C13EXPLAIN8<-"paste0() function ///////////////////////////////////////// The follwoin line lines are equivalent > paste(\"I love\",\"dogs\",sep='') [1] \"I lovedogs\" > paste0(\"I love\",\"dogs\") [1] \"I lovedogs\" ///////////////////////////////////////// " .C13EXPLAIN9<-"as.character(), toSting() ///////////////////////////////////////// > as.character(2019) [1] \"2019\" > x<-1:10 > as.character(x) [1] \"1\" \"2\" \"3\" \"4\" \"5\" \"6\" \"7\" \"8\" \"9\" \"10\" > toString(2019) [1] \"2019\" > ///////////////////////////////////////// " .C13EXPLAIN10<-"gsub() function ///////////////////////////////////////// gsbu() is for global substitution a) sub() just replaces once b) gsum() replaces all > sub(\"a\",\"\",\"aaabbbccc199\") [1] \"aabbbccc199\" > gsub(\"a\",\"\",\"aaabbbccc199\") [1] \"bbbccc199\" ///////////////////////////////////////// " .C13EXPLAIN11<-"date() function ///////////////////////////////////////// > x<-date() > x [1] \"Mon Sep 23 10:10:35 2019\" > nchar(x) [1] 24 ///////////////////////////////////////// " .C13EXPLAIN12<-"as.numeric() function ///////////////////////////////////////// > as.numeric(\"2019\")+2 [1] 2021 ///////////////////////////////////////// " .C13EXPLAIN13<-"date() function ///////////////////////////////////////// A true R date variable is a string. > a<-date() > a [1] \"Mon Sep 23 10:45:58 2019\" > typeof(a) [1] \"character\" ///////////////////////////////////////// " .C13EXPLAIN14<-"Repeating indicator, *, +, ? and . (dot) ///////////////////////////////////////// * (a star) Repeat 0, 1, 2, ...., n times + (plus) Repeat 1, 2, ..., n times ? (question) Repeat 0 or 1 times . (a dot) Any character repeat nce > gsub(\"(ab)*\",\"\",\"abab2019\") [1] \"2019\" ///////////////////////////////////////// " .C13EXPLAIN15<-"logic or ///////////////////////////////////////// We use a pair of [] for or > gsub(\"[a,c]\",\"\",\"abc199\") [1] \"b199\" Using '-' from one letter to another one > gsub(\"[a-c]\",\"\",\"abcek199\") [1] \"ek199\" ///////////////////////////////////////// " .C13EXPLAIN16<-"logic and ///////////////////////////////////////// We use a pair of () to and We remove ab > gsub(\"(ab)\",\"\",\"abcek199ab\") [1] \"cek199\" > > a<-\"I really like dogs\" > gsub(\"(dogs)\",\"cats\",a) [1] \"I really like cats\" ///////////////////////////////////////// " .C13EXPLAIN17<-"logic not ^ ///////////////////////////////////////// Remvoe all letters > gsub(\"[a-z]\",\"\",\"abc199\") [1] \"199\" Remove all non-letters > gsub(\"[^a-z]\",\"\",\"abc199\") [1] \"abc\" > ///////////////////////////////////////// " .C13EXPLAIN18<-"grep() for a patter ///////////////////////////////////////// We can use the grep() function to test the existence of a pattern > x<-\"International\" > grep(\"int\",x) integer(0) > grep(\"Int\",x) [1] 1 > x2<-toupper(x) > x2 [1] \"INTERNATIONAL\" > grep(toupper(\"int\"),x2) [1] 1 Generate an indicator > a<-grep(toupper(\"int\"),x2) > length(a) [1] 1 > b<-grep(toupper(\"good\"),x2) > length(b) [1] 0 ///////////////////////////////////////// " .C13EXPLAIN19<-"strtoi() function ///////////////////////////////////////// The strtoi() function converts a string to an integer > strtoi(\"2019\")+1 [1] 2020 When the input is a string variable which contains a non-integer, the strtoi() function will not work. > strtoi(\"2019\") [1] 2019 > strtoi(\"2019.5\") [1] NA The as.integer() could be applied, shown below. > strtoi(as.integer(\"2019.5\")) [1] 2019 ///////////////////////////////////////// " .C13EXPLAIN20<-"letters vs. LETTERS ///////////////////////////////////////// > letters [1] \"a\" \"b\" \"c\" \"d\" \"e\" \"f\" \"g\" \"h\" \"i\" \"j\" \"k\" \"l\" \"m\" \"n\" [15] \"o\" \"p\" \"q\" \"r\" \"s\" \"t\" \"u\" \"v\" \"w\" \"x\" \"y\" \"z\" > LETTERS [1] \"A\" \"B\" \"C\" \"D\" \"E\" \"F\" \"G\" \"H\" \"I\" \"J\" \"K\" \"L\" \"M\" \"N\" [15] \"O\" \"P\" \"Q\" \"R\" \"S\" \"T\" \"U\" \"V\" \"W\" \"X\" \"Y\" \"Z\" > ///////////////////////////////////////// "