Importing data into data frames

This notebook makes use of the following files:

Currently, these Data files are also available from https://www.pippanorris.com/data. (Previously they were available from http://www.hks.harvard.edu/fs/pnorris/Data/Data.htm.)

Importing data from text files

Importing CSV data:

# We inspect the text file using 'readLines()'
readLines("ConstituencyResults2010.csv",n=5)
[1] "refno,cons,lab,libdem,snp,plcym,green,bnp,ukip"
[2] "1,14.3,51.9,16.3,,7.1,,4.1,1.6"                
[3] "2,35.8,24.5,19.3,,17.8,,,2.1"                  
[4] "3,12.4,44.4,18.6,22.2,,,1.7,"                  
[5] "4,20.7,36.5,28.4,11.9,,1.0,1.2,"               
# For the actual import we use 'read.csv()'
ConstRes2010 <- read.csv("ConstituencyResults2010.csv")
ConstRes2010[1:5,]
  refno cons lab  libdem snp  plcym green bnp ukip
1 1     14.3 51.9 16.3     NA  7.1  NA    4.1 1.6 
2 2     35.8 24.5 19.3     NA 17.8  NA     NA 2.1 
3 3     12.4 44.4 18.6   22.2   NA  NA    1.7  NA 
4 4     20.7 36.5 28.4   11.9   NA   1    1.2  NA 
5 5     30.3 13.6 38.4   15.7   NA  NA    1.1 0.9 
# A CSV file without a variable name header
readLines("ConstituencyResults2010-nohdr.csv",n=5)
[1] "1,14.3,51.9,16.3,,7.1,,4.1,1.6"  "2,35.8,24.5,19.3,,17.8,,,2.1"   
[3] "3,12.4,44.4,18.6,22.2,,,1.7,"    "4,20.7,36.5,28.4,11.9,,1.0,1.2,"
[5] "5,30.3,13.6,38.4,15.7,,,1.1,0.9"
ConstRes2010 <- read.csv("ConstituencyResults2010-nohdr.csv",
                         header=FALSE)
ConstRes2010[1:5,]
  V1 V2   V3   V4   V5   V6   V7 V8  V9 
1 1  14.3 51.9 16.3   NA  7.1 NA 4.1 1.6
2 2  35.8 24.5 19.3   NA 17.8 NA  NA 2.1
3 3  12.4 44.4 18.6 22.2   NA NA 1.7  NA
4 4  20.7 36.5 28.4 11.9   NA  1 1.2  NA
5 5  30.3 13.6 38.4 15.7   NA NA 1.1 0.9
# Importing tab-delimited data:
readLines("ConstituencyResults2010.tsv",n=5)
[1] "refno\tcons\tlab\tlibdem\tsnp\tplcym\tgreen\tbnp\tukip"
[2] "1\t14.3\t51.9\t16.3\t\t7.1\t\t4.1\t1.6"                
[3] "2\t35.8\t24.5\t19.3\t\t17.8\t\t\t2.1"                  
[4] "3\t12.4\t44.4\t18.6\t22.2\t\t\t1.7\t"                  
[5] "4\t20.7\t36.5\t28.4\t11.9\t\t1.0\t1.2\t"               
ConstRes2010 <- read.delim("ConstituencyResults2010.tsv")
ConstRes2010[1:5,]
  refno cons lab  libdem snp  plcym green bnp ukip
1 1     14.3 51.9 16.3     NA  7.1  NA    4.1 1.6 
2 2     35.8 24.5 19.3     NA 17.8  NA     NA 2.1 
3 3     12.4 44.4 18.6   22.2   NA  NA    1.7  NA 
4 4     20.7 36.5 28.4   11.9   NA   1    1.2  NA 
5 5     30.3 13.6 38.4   15.7   NA  NA    1.1 0.9 

Importing fixed-width data:

readLines("ConstituencyResults2010-fwf.txt",n=5)
[1] "  114.351.916.3     7.1     4.1 1.6" "  235.824.519.3    17.8         2.1"
[3] "  312.444.418.622.2         1.7"     "  420.736.528.411.9     1.0 1.2"    
[5] "  530.313.638.415.7         1.1 0.9"
ConstRes2010 <- read.fwf("ConstituencyResults2010-fwf.txt",
                         widths=c(3,4,4,4,4,4,4,4,4))
ConstRes2010[1:5,]
  V1 V2   V3   V4   V5   V6   V7 V8  V9 
1 1  14.3 51.9 16.3   NA  7.1 NA 4.1 1.6
2 2  35.8 24.5 19.3   NA 17.8 NA  NA 2.1
3 3  12.4 44.4 18.6 22.2   NA NA 1.7  NA
4 4  20.7 36.5 28.4 11.9   NA  1 1.2  NA
5 5  30.3 13.6 38.4 15.7   NA NA 1.1 0.9

Importing data from other statistics packages

Importing data using the foreign package

library(foreign)

# An SPSS 'system' file
ConstRes2010 <- read.spss("ConstituencyResults2010.sav",
                          to.data.frame=TRUE)
ConstRes2010[1:5,]
  refno cons lab  libdem snp  plcym green bnp ukip
1 1     14.3 51.9 16.3     NA  7.1  NA    4.1 1.6 
2 2     35.8 24.5 19.3     NA 17.8  NA     NA 2.1 
3 3     12.4 44.4 18.6   22.2   NA  NA    1.7  NA 
4 4     20.7 36.5 28.4   11.9   NA   1    1.2  NA 
5 5     30.3 13.6 38.4   15.7   NA  NA    1.1 0.9 
# An SPSS 'portable' file
ConstRes2010 <- read.spss("ConstituencyResults2010.por",
                          to.data.frame=TRUE)
ConstRes2010[1:5,]
  REFNO CONS LAB  LIBDEM SNP  PLCYM GREEN BNP UKIP
1 1     14.3 51.9 16.3     NA  7.1  NA    4.1 1.6 
2 2     35.8 24.5 19.3     NA 17.8  NA     NA 2.1 
3 3     12.4 44.4 18.6   22.2   NA  NA    1.7  NA 
4 4     20.7 36.5 28.4   11.9   NA   1    1.2  NA 
5 5     30.3 13.6 38.4   15.7   NA  NA    1.1 0.9 
# A Stata file
ConstRes2010 <- read.dta("ConstituencyResults2010.dta")
ConstRes2010[1:5,]
  refno cons lab  libdem snp  plcym green bnp ukip
1 1     14.3 51.9 16.3     NA  7.1  NA    4.1 1.6 
2 2     35.8 24.5 19.3     NA 17.8  NA     NA 2.1 
3 3     12.4 44.4 18.6   22.2   NA  NA    1.7  NA 
4 4     20.7 36.5 28.4   11.9   NA   1    1.2  NA 
5 5     30.3 13.6 38.4   15.7   NA  NA    1.1 0.9 
# The following does not work - newer Stata format is not supported
ConstRes2010 <- read.dta("ConstResults2010-stata-new.dta")
Error in read.dta("ConstResults2010-stata-new.dta"): not a Stata version 5-12 .dta file
Traceback:

1. read.dta("ConstResults2010-stata-new.dta")