R関連‎ > ‎Packages‎ > ‎

StringR

** str_c pasteの代替
   str_c("aa","bb") => "aabb"

** str_detect
   str_detect
   str_detect(c("apple","banana"), "a")
   [1] TRUE TRUE

** str_dup
   str_dup(c("apple","banana"), c(1,3))
   [1] "apple"              "bananabananabanana"

** str_extract
   str_extract(c("apples x4", "flour","sugar butter", "milk x2"), "[a-z]+")
   [1] "apples" "flour"  "sugar"  "milk"

** str_extract_all
   str_extract_all(c("apples x4", "flour","sugar butter", "milk x2"), "[a-z]+")
   [[1]]
   [1] "apples" "x"
   [[2]]
   [1] "flour"
   [[3]]
   [1] "sugar"  "butter"
   [[4]]
   [1] "milk" "x"
** str_locate(string, pattern) 初めてと終わりも表示
   str_locate(c("bapple","baaanana"), "aa+")
   start end
   [1,]    NA  NA
   [2,]     2   4
** str_locate_all(c("bapple","baaananaaeeaaaa"), "aa+")
   [[1]]
   start end
   [[2]]
   start end
   [1,]     2   4
   [2,]     8   9
   [3,]    12  15

** str_match ()で抜き出しができるのが、exractとの違い
   R> strings <- c(" 219 733 8965", "329-293-8753 ", "banana", "595 794 7569", "387 287 6718", "apple", "233.398.9187 ", "482 952 3315",
   "239 923 8115", "842 566 4692", "Work: 579-499-7527", "$1000", "Home: 543.355.3679")
   R> phone <- "([2-9][0-9]{2})[- .]([0-9]{3})[- .]([0-9]{4})"
   R> str_extract(strings, phone)
   [1] "219 733 8965" "329-293-8753" NA             "595 794 7569" "387 287 6718" NA             "233.398.9187"
   [8] "482 952 3315" "239 923 8115" "842 566 4692" "579-499-7527" NA             "543.355.3679"
   str_match(strings, phone)
   [,1]           [,2]  [,3]  [,4]
   [1,] "219 733 8965" "219" "733" "8965"
   [2,] "329-293-8753" "329" "293" "8753"
   [3,] NA             NA    NA    NA
   [4,] "595 794 7569" "595" "794" "7569"
   [5,] "387 287 6718" "387" "287" "6718"
   [6,] NA             NA    NA    NA
   [7,] "233.398.9187" "233" "398" "9187"
   [8,] "482 952 3315" "482" "952" "3315"
   [9,] "239 923 8115" "239" "923" "8115"
   [10,] "842 566 4692" "842" "566" "4692"
   [11,] "579-499-7527" "579" "499" "7527"
   [12,] NA             NA    NA    NA
   [13,] "543.355.3679" "543" "355" "3679"


** str_match_all(string, pattern)
   str_matchでいいのでは? matrixがリストになってもどる


** str_replace(string, patten, replacement)
   \\1という形で (match group)を使える


   str_replace(fruits, "([aeiou])", "")
   [1] "ne apple"     "tw pears"     "thre bananas"
   R> str_replace(fruits, "([aeiou])", "\\1\\1")
   [1] "oone apple"     "twoo pears"     "threee bananas"


** str_replace_all(string, pattern, replacement)
   str_replace_all(fruits, "([aeiou])", "\\1\\1")
   [1] "oonee aapplee"      "twoo peeaars"       "threeee baanaanaas"


** str_split(string, pattern, n)
   str_split("hello and world and me", " and ")
   [[[[1]]]]
   [1] "hello" "world" "me"


** str_split_fixed(string, pattern, n)
   できる数を制限する。matrixが帰ってくるのが嬉しい?

** str_sub(string, start=1, end=-1L)
   hw <- "Hadley Wickham"
   str_sub(hw, c(1,8), c(6,14)) #c(1,8)がstartなのに注意。
   [1] "Hadley"  "Wickham"

   str_sub(hw,-3)
   [1] "ham"
   str_sub(hw,end=-3)
   [1] "Hadley Wickh"

   str_sub(hw, seq_len(str_length(hw)))
   [1] "Hadley Wickham" "adley Wickham"  "dley Wickham"   "ley Wickham"    "ey Wickham"     "y Wickham"
   [7] " Wickham"       "Wickham"        "ickham"         "ckham"          "kham"           "ham"
   [13] "am"             "m"

*** 代入により置換も可能(書き換えてしまう)

    str_sub(hw, 1,3) <- "abc"
    [1] "abcley Wickham"
    str_sub_replaceという形で紹介されている。

** str_trim(string, side="both")
   str_trim("   fsdfd  \n  ")
   [1] "fsdfd"




Comments