R language - batches read the data file, and extract the digital string

#first part

# First of all to the current csv file name ending folder time coming

filelist <- list.files(pattern=".*.csv")

File number #
m <-length (filelist)

# Clocked into the data in a file name, a list of data obtained
datalist <- lapply (filelist, function  (x) read.csv (x, header = F, stringsAsFactors = F))


#the second part

 library (stringr) # is not installed please install

 cha1 <-c ( "a1", "b23", "c4", "d56", "e", "f4") # 6 This is a string, each string which contains numbers, considering how digital extract

  col1 <-str_extract_all (cha1, "\\ d") # get a list of strings, each element corresponding to each string of numbers, but not as you think

# Specific form is this: such as 23, get is "2", "3", so how do we want to turn it into a digital 23 is a problem, the solution is as follows:

  I <-1
  the while (I <= length (col1)) {
    IF (length (col1 [[I]]) == 0) col1 <-col1 [-i] I the else <+ -i # This step is considered. 1 no strings corresponding to the number of deleted list element, for example, "E"
  } 
  Col11 <-numeric (length (col1))
  for (I in. 1: length (col1)) {
    L1 <-length is (col1 [[I ]])
    L11 <-C ()
    for (in J. 1: L1)
     L11 <-paste (L11, col1 [[I]] [J], On Sep = "") each element of a list # connected, such as "2", "3" becomes the string "23 is"
    Col11 [I] <- as.numeric (L11) # good link string then digitized
  }  

  col11 <-col11 [! duplicated (col11)] will be repeated after some digital # After treatment, the letter will soon be removed, this step is to re-vector processing (no need to ignore the weight of Kazakhstan)

# Two complementary functions:
. # 1 to delete a particular character string
gsub (a, b, c): c is a string of characters replaced with characters b, for example:

gsub ( "", "", "Lin hai") # this removes the spaces in the string

# 2 specified row and column read when reading data excel.
Library (data.table)
DATAl <- fread ( "Data .csv ", skip = 1, nrows = 100, select = c (1:50), data.table = F, header = F) # reads the first fifty, one hundred before the line
# where the skip is the starting line , nrows line is terminated, SELECT is the column number to be read (column names can also be written as select = c ( "X1", "X2"), called a read column represents X1, X2, variable)


Published 49 original articles · won praise 95 · Views 230,000 +

Guess you like

Origin blog.csdn.net/Trisyp/article/details/52276790