<- read.table(file.path('..', 'data', 'RTADataSub.csv'),
dat sep = ',', header = TRUE)
sapply(dat, class)[1:10] # What are the classes of the columns?
X2010.08.02.18.55 X2336 X549 X2086
"character" "character" "character" "character"
X666 X481 X298 X1624
"character" "character" "character" "character"
X1732 X593
"character" "character"
## whoops, there is an 'x', presumably indicating missingness:
unique(dat[ , 2])
[1] "2124" "1830" "1833" "1600" "1578" "1187" "1005" "918" "865" "871"
[11] "860" "883" "897" "898" "893" "913" "870" "962" "880" "875"
[21] "884" "894" "836" "848" "885" "851" "900" "861" "866" "867"
[31] "829" "853" "920" "877" "908" "855" "845" "859" "856" "825"
[41] "828" "854" "847" "840" "873" "822" "818" "838" "815" "813"
[51] "816" "849" "802" "805" "792" "823" "808" "798" "800" "842"
[61] "809" "807" "826" "810" "801" "794" "771" "796" "790" "787"
[71] "775" "751" "783" "811" "768" "779" "795" "770" "821" "830"
[81] "767" "772" "791" "781" "773" "777" "814" "778" "782" "837"
[91] "759" "846" "797" "835" "832" "793" "803" "834" "785" "831"
[101] "820" "812" "824" "728" "760" "762" "753" "758" "764" "741"
[111] "709" "735" "749" "752" "761" "750" "776" "766" "789" "763"
[121] "864" "858" "869" "886" "844" "863" "916" "890" "872" "907"
[131] "926" "935" "933" "906" "905" "912" "972" "996" "1009" "961"
[141] "952" "981" "917" "1011" "1071" "1920" "3245" "3805" "3926" "3284"
[151] "2700" "2347" "2078" "2935" "3040" "1860" "1437" "1512" "1720" "1493"
[161] "1026" "928" "874" "833" "850" "" "x"
## let's treat 'x' as a missing value indicator
<- read.table(file.path('..', 'data', 'RTADataSub.csv'),
dat2 sep = ',', header = TRUE,
na.strings = c("NA", "x"))
unique(dat2[ , 2])
[1] 2124 1830 1833 1600 1578 1187 1005 918 865 871 860 883 897 898 893
[16] 913 870 962 880 875 884 894 836 848 885 851 900 861 866 867
[31] 829 853 920 877 908 855 845 859 856 825 828 854 847 840 873
[46] 822 818 838 815 813 816 849 802 805 792 823 808 798 800 842
[61] 809 807 826 810 801 794 771 796 790 787 775 751 783 811 768
[76] 779 795 770 821 830 767 772 791 781 773 777 814 778 782 837
[91] 759 846 797 835 832 793 803 834 785 831 820 812 824 728 760
[106] 762 753 758 764 741 709 735 749 752 761 750 776 766 789 763
[121] 864 858 869 886 844 863 916 890 872 907 926 935 933 906 905
[136] 912 972 996 1009 961 952 981 917 1011 1071 1920 3245 3805 3926 3284
[151] 2700 2347 2078 2935 3040 1860 1437 1512 1720 1493 1026 928 874 833 850
[166] NA
## Let's check that the empty strings from 'dat' are now NAs in 'dat2'
which(dat[ , 2] == "")[1:10]
[1] 312 313 314 315 317 318 319 320 322 323
which(dat[, 2] == "")[1], ] # pull out a line with a missing string dat2[
X2010.08.02.18.55 X2336 X549 X2086 X666 X481 X298 X1624 X1732 X593 X222
312 2010-08-03 10:31 NA NA NA NA NA NA NA NA NA NA
X911 X261 X1730 X211 X365 X216 X438 X596 X206 X204 X270 X176 X1159 X1137
312 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
X135 X2036 X138 X1038 X201 X610 X627 X195 X976 X151 X1830 X421 X1087 X1157
312 NA NA NA NA NA NA NA NA NA NA NA NA NA NA
X181 X267 X193 X391 X208 X614 X546 X186 X1391 X217 X230 X625 X376 X164 X329
312 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
X1043 X497 X440 X197 X287 X837 X226 X973
312 NA NA NA NA NA NA NA NA