** str_c pasteの代替 str_c("aa","bb") => "aabb" ** str_detect str_detect str_detect(c("apple","banana"), "a") [1] TRUE TRUE ** str_dup str_dup(c("apple","banana"), c(1,3)) [1] "apple" "bananabananabanana" ** str_extract str_extract(c("apples x4", "flour","sugar butter", "milk x2"), "[a-z]+") [1] "apples" "flour" "sugar" "milk" ** str_extract_all str_extract_all(c("apples x4", "flour","sugar butter", "milk x2"), "[a-z]+") [[1]] [1] "apples" "x" [[2]] [1] "flour" [[3]] [1] "sugar" "butter" [[4]] [1] "milk" "x" ** str_locate(string, pattern) 初めてと終わりも表示 str_locate(c("bapple","baaanana"), "aa+") start end [1,] NA NA [2,] 2 4 ** str_locate_all(c("bapple","baaananaaeeaaaa"), "aa+") [[1]] start end [[2]] start end [1,] 2 4 [2,] 8 9 [3,] 12 15 ** str_match ()で抜き出しができるのが、exractとの違い R> strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569", "387 287 6718", "apple", "233.398.9187 ", "482 952 3315", "239 923 8115", "842 566 4692", "Work: 579-499-7527", "$1000", "Home: 543.355.3679") R> phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})" R> str_extract(strings, phone) [1] "219 733 8965" "329-293-8753" NA "595 794 7569" "387 287 6718" NA "233.398.9187" [8] "482 952 3315" "239 923 8115" "842 566 4692" "579-499-7527" NA "543.355.3679" str_match(strings, phone) [,1] [,2] [,3] [,4] [1,] "219 733 8965" "219" "733" "8965" [2,] "329-293-8753" "329" "293" "8753" [3,] NA NA NA NA [4,] "595 794 7569" "595" "794" "7569" [5,] "387 287 6718" "387" "287" "6718" [6,] NA NA NA NA [7,] "233.398.9187" "233" "398" "9187" [8,] "482 952 3315" "482" "952" "3315" [9,] "239 923 8115" "239" "923" "8115" [10,] "842 566 4692" "842" "566" "4692" [11,] "579-499-7527" "579" "499" "7527" [12,] NA NA NA NA [13,] "543.355.3679" "543" "355" "3679" ** str_match_all(string, pattern) str_matchでいいのでは? matrixがリストになってもどる ** str_replace(string, patten, replacement) \\1という形で (match group)を使える str_replace(fruits, "([aeiou])", "") [1] "ne apple" "tw pears" "thre bananas" R> str_replace(fruits, "([aeiou])", "\\1\\1") [1] "oone apple" "twoo pears" "threee bananas" ** str_replace_all(string, pattern, replacement) str_replace_all(fruits, "([aeiou])", "\\1\\1") [1] "oonee aapplee" "twoo peeaars" "threeee baanaanaas" ** str_split(string, pattern, n) str_split("hello and world and me", " and ") [[[[1]]]] [1] "hello" "world" "me" ** str_split_fixed(string, pattern, n) できる数を制限する。matrixが帰ってくるのが嬉しい? ** str_sub(string, start=1, end=-1L) hw <- "Hadley Wickham" str_sub(hw, c(1,8), c(6,14)) #c(1,8)がstartなのに注意。 [1] "Hadley" "Wickham" str_sub(hw,-3) [1] "ham" str_sub(hw,end=-3) [1] "Hadley Wickh" str_sub(hw, seq_len(str_length(hw))) [1] "Hadley Wickham" "adley Wickham" "dley Wickham" "ley Wickham" "ey Wickham" "y Wickham" [7] " Wickham" "Wickham" "ickham" "ckham" "kham" "ham" [13] "am" "m" *** 代入により置換も可能(書き換えてしまう) str_sub(hw, 1,3) <- "abc" [1] "abcley Wickham" str_sub_replaceという形で紹介されている。 ** str_trim(string, side="both") str_trim(" fsdfd \n ") [1] "fsdfd" |