2014-03-28
Workflow for retrain classifier:
1. Generate random test ids
2. Generate features
3. Generate SVM models and predictions.
Friday, March 28, 2014
Tuesday, March 25, 2014
Thursday, March 20, 2014
2014-03-20
merge all files in different directories:
merge all files in different directories:
find /path/to/directory/ -name *.csv -print0 | xargs -0 -I file cat file > merged.file
samtools view -bS DQB1_030201.sam | samtools sort - DQB1_030201_sorted
samtools index DQB1_030201_sorted.bam DQB1_030201_sorted.bai
samtools view -bS DQB1_030501.sam | samtools sort - DQB1_030501_sorted
samtools index DQB1_030501_sorted.bam DQB1_030501_sorted.bai
samtools view -bS DQB1_0331.sam | samtools sort - DQB1_0331_sorted
samtools index DQB1_0331_sorted.bam DQB1_0331_sorted.bai
samtools view -bS DQB1_040101.sam | samtools sort - DQB1_040101_sorted
samtools index DQB1_040101_sorted.bam DQB1_040101_sorted.bai
samtools view -bS DQB1_040201.sam | samtools sort - DQB1_040201_sorted
samtools index DQB1_040201_sorted.bam DQB1_040201_sorted.bai
Wednesday, March 19, 2014
2014-03-19
INSERT INTO pubmed_temp SELECT * FROM pubmed_information_backup20140306;
mysql> INSERT INTO pubmed_temp SELECT * FROM pubmed_information_backup20140306;
Query OK, 360246 rows affected (47.85 sec)
Records: 360246 Duplicates: 0 Warnings: 0
mysql> ALTER IGNORE TABLE pubmed_temp ADD UNIQUE INDEX PUBMED_ID_INDEX (Pubmed_ID);
Query OK, 360246 rows affected (21.50 sec)
Records: 360246 Duplicates: 180123 Warnings: 0
SELECT * FROM pubmed_temp ORDER BY Num DESC LIMIT 10;
RENAME TABLE pubmed_temp TO pubmed_information;
mysql> select count(1) from t4_tokenized_pubmed_information_new;
+----------+
| count(1) |
+----------+
| 54472 |
+----------+
1 row in set (0.02 sec)
INSERT INTO pubmed_temp SELECT * FROM pubmed_information_backup20140306;
mysql> INSERT INTO pubmed_temp SELECT * FROM pubmed_information_backup20140306;
Query OK, 360246 rows affected (47.85 sec)
Records: 360246 Duplicates: 0 Warnings: 0
mysql> ALTER IGNORE TABLE pubmed_temp ADD UNIQUE INDEX PUBMED_ID_INDEX (Pubmed_ID);
Query OK, 360246 rows affected (21.50 sec)
Records: 360246 Duplicates: 180123 Warnings: 0
SELECT * FROM pubmed_temp ORDER BY Num DESC LIMIT 10;
RENAME TABLE pubmed_temp TO pubmed_information;
mysql> select count(1) from t4_tokenized_pubmed_information_new;
+----------+
| count(1) |
+----------+
| 54472 |
+----------+
1 row in set (0.02 sec)
Thursday, March 13, 2014
Wednesday, March 12, 2014
2014-03-11
x <- scale(x, center = FALSE)
hmap(x, labRow = FALSE, method = "OLO")
hmap(x, labRow = FALSE, method = "OLO", col=diverge_hcl(100), range=c(-3.5,3.5), colorkey=TRUE)
hmap(x, labRow = FALSE, method = "OLO", col = c("yellow", "blue"))
x <- read.csv(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/rank_log2.csv", head=TRUE,sep=",")
x
attributes(x)
x <- as.matrix(x)
x
attributes(x)
x[1:10,]
?read.csv
x <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/rank_log2.csv", head=TRUE, sep=",", row.names=1)
x
x <- data.matrix(x)
x
attributes(x)
x[1:10,]
library(seritation)
library(seriation)
o <- c(seriate(dist(x), method ="OLO"),seriate(dist(t(x)), method = "OLO"))
o
history
history(100)
o1 <- seriate(dist(x), method = "OLO")
o2 <- seriate(dist(t(x)), method = "OLO")
o1
desribe(o1)
attributes(o1)
attributes(o2)
o1[1]
o1[[1]]
o1[[1]][1]
o1[[1]][[1]]
attributes(o1[[1]][[1]])
head(get_order(o1))
order1 <- get_order(o1)
order2 <- get_order(o2)
order2
x
attributes(x)
clustered_data <- x[order1,order2]
clustered_data
clustered_data[1:2,]
ls()
history(100)
> pdf("aa.pdf")
> heatmap.2(clustered_data, col=my_palette, scale="none", Colv = NULL, dendrogram = "row", key=T, keysize = 1.5, density.info="none", trace="none",cexCol=0.9, labRow=NA)
> dev.off()
null device
1
> heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1.5, density.info="none", trace="none",cexCol=0.5, labRow=NA)
cc = c(rep("blue",10),rep("brown",11),rep("cyan",11),rep("orange",4),rep("red",15))
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12), key=T, keysize=1)
aa_disease
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/figure3_diseaseType.pdf")
heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1, density.info="none", trace="none",cexCol=0.3, labRow=NA, ColSideColors=aa_disease, margin=c(12, 12), labCol=NA)
dev.off()
aa_disease
colnames(clustered_data)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/test.pdf")
heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1, density.info="none", trace="none",cexCol=0.3, labRow=NA, ColSideColors=aa_disease, margin=c(12, 12))
dev.off()
x1 <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/th1_th17_signiture.csv", head=TRUE, sep=",", row.names=1)
x1 <- as.matrix(x1)
attributes9x1)
attributes(x1)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
x1
scale(x1)
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12), key=T, keysize=1)
dev.off()
history(-25)
history(25)
x1 <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/th1_th17_signiture_data.csv", head=TRUE, sep=",", row.names=1)
x1 <- as.matrix(x1)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, margin=c(12, 12), labCol=NA)
x <- scale(x, center = FALSE)
hmap(x, labRow = FALSE, method = "OLO")
hmap(x, labRow = FALSE, method = "OLO", col=diverge_hcl(100), range=c(-3.5,3.5), colorkey=TRUE)
hmap(x, labRow = FALSE, method = "OLO", col = c("yellow", "blue"))
x <- read.csv(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/rank_log2.csv", head=TRUE,sep=",")
x
attributes(x)
x <- as.matrix(x)
x
attributes(x)
x[1:10,]
?read.csv
x <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/rank_log2.csv", head=TRUE, sep=",", row.names=1)
x
x <- data.matrix(x)
x
attributes(x)
x[1:10,]
library(seritation)
library(seriation)
o <- c(seriate(dist(x), method ="OLO"),seriate(dist(t(x)), method = "OLO"))
o
history
history(100)
o1 <- seriate(dist(x), method = "OLO")
o2 <- seriate(dist(t(x)), method = "OLO")
o1
desribe(o1)
attributes(o1)
attributes(o2)
o1[1]
o1[[1]]
o1[[1]][1]
o1[[1]][[1]]
attributes(o1[[1]][[1]])
head(get_order(o1))
order1 <- get_order(o1)
order2 <- get_order(o2)
order2
x
attributes(x)
clustered_data <- x[order1,order2]
clustered_data
clustered_data[1:2,]
ls()
history(100)
> pdf("aa.pdf")
> heatmap.2(clustered_data, col=my_palette, scale="none", Colv = NULL, dendrogram = "row", key=T, keysize = 1.5, density.info="none", trace="none",cexCol=0.9, labRow=NA)
> dev.off()
null device
1
> heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1.5, density.info="none", trace="none",cexCol=0.5, labRow=NA)
cc = c(rep("blue",10),rep("brown",11),rep("cyan",11),rep("orange",4),rep("red",15))
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12), key=T, keysize=1)
aa_disease
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/figure3_diseaseType.pdf")
heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1, density.info="none", trace="none",cexCol=0.3, labRow=NA, ColSideColors=aa_disease, margin=c(12, 12), labCol=NA)
dev.off()
aa_disease
colnames(clustered_data)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/test.pdf")
heatmap.2(clustered_data, col=my_palette, scale="none", dendrogram = "column", key=T, keysize=1, density.info="none", trace="none",cexCol=0.3, labRow=NA, ColSideColors=aa_disease, margin=c(12, 12))
dev.off()
x1 <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/th1_th17_signiture.csv", head=TRUE, sep=",", row.names=1)
x1 <- as.matrix(x1)
attributes9x1)
attributes(x1)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
x1
scale(x1)
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12), key=T, keysize=1)
dev.off()
history(-25)
history(25)
x1 <- read.table(file="/Bioinformatics/Users/zfu/2014_BP_TB_Paper/th1_th17_signiture_data.csv", head=TRUE, sep=",", row.names=1)
x1 <- as.matrix(x1)
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "row", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, labCol=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, margin=c(12, 12))
dev.off()
pdf("/Bioinformatics/Users/zfu/2014_BP_TB_Paper/Th1_Th17_Significant.pdf")
heatmap.2(scale(x1), col=my_palette, scale="none", Colv=NULL, dendrogram = "none", density.info="none", trace="none", cexCol=0.3, labRow=NA, margin=c(12, 12), labCol=NA)
Monday, March 10, 2014
2014-03-10
New mapping methods
HLA pipeline 5.0.3
HLA pipeline 5.0.4
HLA pipeline 5.0.5
HLA pipeline 5.0.6
HLA pipeline 5.0.7
HLA pipeline 5.0.8
HLA pipeline 5.0.9
Old mapping methods
HLA pipeline 5.0.10
HLA pipeline 5.0.11
HLA pipeline 5.0.12
HLA pipeline 5.0.13
HLA pipeline 5.0.14
HLA pipeline 5.0.15
HLA pipeline 5.0.16
delete all sam files: HLA pipeline 5.0.X
keep all sam files: HLA pipeline 5.0.X.1
Pipeline 5.0
Class 1: 175bp alignment
Class 2: 200bp alignment
New mapping methods
HLA pipeline 5.0.3
HLA pipeline 5.0.4
HLA pipeline 5.0.5
HLA pipeline 5.0.6
HLA pipeline 5.0.7
HLA pipeline 5.0.8
HLA pipeline 5.0.9
Old mapping methods
HLA pipeline 5.0.10
HLA pipeline 5.0.11
HLA pipeline 5.0.12
HLA pipeline 5.0.13
HLA pipeline 5.0.14
HLA pipeline 5.0.15
HLA pipeline 5.0.16
delete all sam files: HLA pipeline 5.0.X
keep all sam files: HLA pipeline 5.0.X.1
Pipeline 5.0
Class 1: 175bp alignment
Class 2: 200bp alignment
Friday, March 7, 2014
Thursday, March 6, 2014
2014-03-05
********************************
Correct One
********************************
10662 rows in set (4 hours 4 min 7.70 sec)
mysql> select Table4_20140226.PubMed_ID from Table4_20140226 LEFT JOIN pubmed_information ON Table4_20140226.PubMed_ID = pubmed_information.PubMed_ID WHERE pubmed_information.PubMed_ID IS NULL;
360246 rows in set (4 hours 56 min 4.57 sec)
mysql> select PubMed_ID from Table4_20140226 INNER JOIN pubmed_information USING (PubMed_ID);
********************************
Correct One
********************************
10662 rows in set (4 hours 4 min 7.70 sec)
mysql> select Table4_20140226.PubMed_ID from Table4_20140226 LEFT JOIN pubmed_information ON Table4_20140226.PubMed_ID = pubmed_information.PubMed_ID WHERE pubmed_information.PubMed_ID IS NULL;
360246 rows in set (4 hours 56 min 4.57 sec)
mysql> select PubMed_ID from Table4_20140226 INNER JOIN pubmed_information USING (PubMed_ID);
Tuesday, March 4, 2014
2014-03-04
Mysql dataset difference
Mysql dataset difference
SELECT *
FROM MyTableA
WHERE imageURL NOT IN (SELECT imageURL FROM MyTableB)
SELECT a.id FROM a LEFT JOIN b ON a.id = b.id WHERE b.id IS NULL
SELECT b.id FROM b LEFT JOIN a ON b.id = a.id WHERE a.id IS NULL
You can also use a left outer join (the first tells you where a row exists in table a and not b, the second vice-versa):
select Table4_20140226.PubMed_ID from Table4_20140226 LEFT JOIN pubmed_information ON Table4_20140226.PubMed_ID = pubmed_information.PubMed_ID WHERE pubmed_information.PubMed_ID IS NULL;
SELECT DISTINCT value FROM table_a
INNER JOIN table_b
USING (value);
+-------+
| value |
+-------+
| B |
+-------+
SELECT DISTINCT value FROM table_a
WHERE (value) IN
(SELECT value FROM table_b);
+-------+
| value |
+-------+
| B |
+-------+
Subscribe to:
Comments (Atom)