Tuesday, February 28, 2017

Kaggle Bosch test

import numpy as np
import pandas as pd
import xgboost as xgb
import time
import random
from sklearn.model_selection import StratifiedKFold

#For sampling rows from input file
random_seed = 9
subset = 0.4

n_rows = 1183747;
train_rows = int(n_rows * subset)
random.seed(random_seed)
skip = sorted(random.sample(xrange(1,n_rows + 1),n_rows-train_rows))
data = pd.read_csv("/home/zfu/Kaggle/bosch/train_numeric.csv", index_col=0, dtype=np.float32, skiprows=skip)
y = data['Response'].values
del data['Response']
X = data.values

# GPU

param = {}
param['objective'] = 'binary:logistic'
param['eval_metric'] = 'auc'
param['max_depth'] = 5
param['eta'] = 0.3
param['silent'] = 0
param['updater'] = 'grow_gpu'
#param['updater'] = 'grow_colmaker'

num_round = 20

In [18]: i = 1
    ...: 
    ...: for train_index, test_index in skf.split(X, y):
    ...: ^Iprint("TRAIN:", train_index, "TEST:", test_index)
    ...: ^IX_train, X_test = X[train_index], X[test_index]
    ...: ^Iy_train, y_test = y[train_index], y[test_index]
    ...: ^Idtrain = xgb.DMatrix(X_train, label=y_train)
    ...: ^Itmp = time.time()
    ...: ^Ibst = xgb.train(param, dtrain, num_round)
    ...: ^Iboost_time = time.time() - tmp
    ...: ^Ires = bst.eval(xgb.DMatrix(X_test, label=y_test))
    ...: ^Iprint("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time)))
    ...: ^Idel bst
    ...: ^Ii = i + 1
    ...:  
('TRAIN:', array([ 94682,  94683,  94684, ..., 473495, 473496, 473497]), 'TEST:', array([    0,     1,     2, ..., 97097, 97390, 97619]))
[00:48:52] Allocated 2651/8022 MB on GeForce GTX 1070
Fold 1: [0] eval-auc:0.687818, Boost Time 13.2141840458
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([ 94682,  94683,  94684, ..., 191560, 191830, 191922]))
[00:49:07] Allocated 2649/8006 MB on GeForce GTX 1070
Fold 2: [0] eval-auc:0.699430, Boost Time 9.08604192734
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([189390, 189391, 189392, ..., 286293, 286337, 286657]))
[00:49:21] Allocated 2650/8006 MB on GeForce GTX 1070
Fold 3: [0] eval-auc:0.679230, Boost Time 9.11538410187
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([284088, 284089, 284090, ..., 379736, 379934, 380831]))
[00:49:35] Allocated 2648/8006 MB on GeForce GTX 1070
Fold 4: [0] eval-auc:0.682657, Boost Time 9.08212804794
('TRAIN:', array([     0,      1,      2, ..., 379736, 379934, 380831]), 'TEST:', array([378786, 378787, 378788, ..., 473495, 473496, 473497]))
[00:49:49] Allocated 2650/8006 MB on GeForce GTX 1070

Fold 5: [0] eval-auc:0.698049, Boost Time 9.16434597969


# CPU

param = {}
param['objective'] = 'binary:logistic'
param['eval_metric'] = 'auc'
param['max_depth'] = 5
param['eta'] = 0.3
param['silent'] = 0
#param['updater'] = 'grow_gpu'
param['updater'] = 'grow_colmaker'

num_round = 20

In [20]: i = 1
    ...: 
    ...: for train_index, test_index in skf.split(X, y):
    ...: ^Iprint("TRAIN:", train_index, "TEST:", test_index)
    ...: ^IX_train, X_test = X[train_index], X[test_index]
    ...: ^Iy_train, y_test = y[train_index], y[test_index]
    ...: ^Idtrain = xgb.DMatrix(X_train, label=y_train)
    ...: ^Itmp = time.time()
    ...: ^Ibst = xgb.train(param, dtrain, num_round)
    ...: ^Iboost_time = time.time() - tmp
    ...: ^Ires = bst.eval(xgb.DMatrix(X_test, label=y_test))
    ...: ^Iprint("Fold {}: {}, Boost Time {}".format(i, res, str(boost_time)))
    ...: ^Idel bst
    ...: ^Ii = i + 1
    ...:  
('TRAIN:', array([ 94682,  94683,  94684, ..., 473495, 473496, 473497]), 'TEST:', array([    0,     1,     2, ..., 97097, 97390, 97619]))
Fold 1: [0] eval-auc:0.691973, Boost Time 30.7943990231
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([ 94682,  94683,  94684, ..., 191560, 191830, 191922]))
Fold 2: [0] eval-auc:0.702624, Boost Time 28.8307631016
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([189390, 189391, 189392, ..., 286293, 286337, 286657]))
Fold 3: [0] eval-auc:0.669979, Boost Time 28.8397688866
('TRAIN:', array([     0,      1,      2, ..., 473495, 473496, 473497]), 'TEST:', array([284088, 284089, 284090, ..., 379736, 379934, 380831]))
Fold 4: [0] eval-auc:0.674863, Boost Time 28.8023629189
('TRAIN:', array([     0,      1,      2, ..., 379736, 379934, 380831]), 'TEST:', array([378786, 378787, 378788, ..., 473495, 473496, 473497]))

Fold 5: [0] eval-auc:0.696640, Boost Time 30.3664522171



Saturday, February 25, 2017

public ip address

[zfu@gpu-0-0 ~]$ dig +short myip.opendns.com @resolver1.opendns.com

8.37.117.238


Tuesday, February 7, 2017

fastqc command

/mnt/apps/FastQC/FastQC-0.11.2/fastqc --contaminants /mnt/BioAdHoc/Users/zfu/packages/FastQC/Configuration/contaminants_LJI.txt -o /mnt/overflow/NGSAnalyses/QC/QC_Validation/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502 -t 2 /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502/BeforeTrimming.28_C1_038_C50_N705S502_R1.fastq.gz

/mnt/apps/FastQC/FastQC-0.11.2/fastqc --contaminants /mnt/BioAdHoc/Users/zfu/packages/FastQC/Configuration/contaminants_LJI.txt --casava -o /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/output/28_C1_038_C50_N705S502 -t 2 /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502/28_C1_038_C50_N705S502_GGACTCCT-CTCTCTAT_L002_R2_001.fastq.gz

/mnt/apps/FastQC/FastQC-0.11.2/fastqc --contaminants /mnt/BioAdHoc/Users/zfu/packages/FastQC/Configuration/contaminants_LJI.txt -o /mnt/overflow/NGSAnalyses/QC/QC_Validation/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502 -t 2 /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502/BeforeTrimming.28_C1_038_C50_N705S502_R2.fastq.gz


/mnt/apps/FastQC/FastQC-0.11.2/fastqc --contaminants /mnt/BioAdHoc/Users/zfu/packages/FastQC/Configuration/contaminants_LJI.txt -o /mnt/overflow/NGSAnalyses/QC/QC_Validation/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502 -t 2 /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/trimmed/28_C1_038_C50_N705S502/AfterTrimming.28_C1_038_C50_N705S502_R1.fastq.gz

/mnt/apps/FastQC/FastQC-0.11.2/fastqc --contaminants /mnt/BioAdHoc/Users/zfu/packages/FastQC/Configuration/contaminants_LJI.txt -o /mnt/overflow/NGSAnalyses/QC/QC_Validation/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/28_C1_038_C50_N705S502 -t 2 /mnt/overflow/BioAdHoc/Users/zfu/2017_Trimming/160804_D00361_0380_AHK3VCBCXX_8_4_16_GrSe60B_AaMi_150x150/trimmed/28_C1_038_C50_N705S502/AfterTrimming.28_C1_038_C50_N705S502_R2.fastq.gz