setwd('D:/wonga/') data<-read.table("cs-training.csv",header = TRUE,sep=",",na.strings="NA") datatest<-read.table("cs-test.csv",header = TRUE,sep=",",na.strings="NA") data<-rbind(data,datatest) data$Sf<-as.factor(data$Serious) #data<-data.frame(matrix(unlist(data22),ncol=ncol(data22),dimnames=list(NULL,colnames(data22)))) datafull<-data[complete.cases(data),] # # Impute the mising value # cor(data[,3:12],use="complete.obs") data[is.na(data$Income),6] data[is.na(data$Income)&data$DR>2,7]<-0#impute data[is.na(data$Income)&data$DR==2,7]<-0 datacom<-data[complete.cases(data),] library(rpart) head(datacom) datacom$NumOfDepF<-factor(datacom$NumOfDep) fit<-rpart(NumOfDepF~Revolving+age+NumOfTime30.59+DR+NumRELL,method="class",data=datacom library(DMwR) nlist<-c(3,4,5,6,8,10,12) data[data$age<30,nlist]<-knnImputation(data[data$age<30,nlist],k=3,meth="median") dim(data[data$age<30&is.na(data$NumOfDep),]) dim(data[data$age>30&data$age<40,]) data[data$age>=30&data$age<35,nlist]<-knnImputation(data[data$age>=30&data$age<35,nlist],k=3,meth="median") dim(data[data$age>=30&data$age<35&is.na(data$NumOfDep),]) data[data$age>=35&data$age<40,nlist]<-knnImputation(data[data$age>=35&data$age<40,nlist],k=3,meth="median") data[data$age>=40&data$age<45,nlist]<-knnImputation(data[data$age>=40&data$age<45,nlist],k=3,meth="median") data[data$age>=45&data$age<50,nlist]<-knnImputation(data[data$age>=45&data$age<50,nlist],k=3,meth="median") data[data$age>=50&data$age<60,nlist]<-knnImputation(data[data$age>=50&data$age<60,nlist],k=3,meth="median") data[data$age>=60&data$age<70,nlist]<-knnImputation(data[data$age>=60&data$age<70,nlist],k=3,meth="median") data[data$age>=70,nlist]<-knnImputation(data[data$age>=70,nlist],k=3,meth="median") nlist<-c(3,4,5,6,7,8,10) data[data$age<30,nlist]<-knnImputation(data[data$age<30,nlist],k=3) data[data$age>=30&data$age<35,nlist]<-knnImputation(data[data$age>=30&data$age<35,nlist],k=3) data[data$age>=35&data$age<40,nlist]<-knnImputation(data[data$age>=35&data$age<40,nlist],k=3) data[data$age>=40&data$age<45,nlist]<-knnImputation(data[data$age>=40&data$age<45,nlist],k=3) data[data$age>=45&data$age<50,nlist]<-knnImputation(data[data$age>=45&data$age<50,nlist],k=3) data[data$age>=50&data$age<55,nlist]<-knnImputation(data[data$age>=50&data$age<55,nlist],k=3) data[data$age>=55&data$age<60,nlist]<-knnImputation(data[data$age>=55&data$age<60,nlist],k=3) data[data$age>=60&data$age<70,nlist]<-knnImputation(data[data$age>=60&data$age<70,nlist],k=3) data[data$age>=70,nlist]<-knnImputation(data[data$age>=70,nlist],k=3) data[!complete.cases(data),"Serious"]<-0 data[complete.cases(data),] nlist<-c(3,4,5,6,7,8,19,12,13) data[-c(1,2,12,13)]<-scale(data[-c(1,2,12,13)])#??? write.csv(data,file="normdata.csv") # # Adding new features # days<-data[5]*45+data[9]*90+data[11]*75 names(days)<-"days" data<-data.frame(data,days) netIncome<-data[7]-data[7]*data[6] names(netIncome)<-"netIncome" data<-data.frame(data,netIncome) Incomepp<-ifelse(data$netIncome>0,data$netIncome/(data$NumOfDep+1),0) names(Incomepp)<-"Incomepp" data<-data.frame(data,Incomepp) # # Transform the data # dmatrix<-as.matrix(data)#?? digmatrix<-as.numeric(dmatrix) dfmatrix<-matrix(digmatrix,nrow=150000)#150000 will change to 251503(dim(data)[1]) depending on how many data point you have l2matrix<-l2norm(dfmatrix) l2matrix[,2]<-data[,2] l2matrix[,1]<-data[,1] write.csv(l2matrix,file="normdata.csv") data2<-data.frame(l2matrix) names(data2)<-c("id","Serious","Revolving","age","NumOfTime30.59", "DR","Income","NumOfOCL","NumOfTimes90","NumRELL","NumOfTime60.89","NumOfDep","days", "netIncome","Incomepp" ) l2norm = function(matr){ leng <- dim(matr)[2] dg<-sqrt(diag(t(matr)%*%matr)) for(k in 1:leng) { matr[,k]<- matr[,k]/dg[k] } return(matr) } # # Train models # #the best model is trained using credit.model.nnet<-nnet(Serious~Revolving+age+NumOfTime30.59+DR+Income+NumOfOCL+NumOfTimes90+NumRELL+NumOfTime60.89+NumOfDep+days+netIncome,data=data[1:150000,],size=10,linout=FALSE,decay=0.025,maxit=500) nnet.preds<-predict(credit.model.nnet,data[150001:251503,]) write.csv(nnet.preds,file="pred352.csv") data<-read.table("train.csv",header = TRUE,sep=",",na.strings="NA") credit.model.nnet<-nnet(Serious~Revolving+age+NumOfTime30.59+DR+Income+NumOfOCL+NumOfTimes90+NumRELL+NumOfTime60.89+NumOfDep+days+netIncome+Incomepp,data=data2[1:150000,],size=8,linout=FALSE,decay=0.025,maxit=500) test<-read.table("test.csv",header = TRUE,sep=",",na.strings="NA") nnet.preds<-predict(credit.model.nnet,data2[150001:251503,]) write.csv(nnet.preds,file="nnetpred.csv") @Tools\liblinear-1.8\windows\run the following command train -v 10 -c 1 -s 2 -w1 1 l2normsvmdata.txt