#!/bin/bash #************************************************************************** # Copyright (C) 2007 by Universidad Nacional de Colombia * # http://www.unal.edu.co * # * # This program is free software; you can redistribute it and/or modify * # it under the terms of the GNU General Public License as published by * # the Free Software Foundation; either version 2 of the License, or * # (at your option) any later version. * # * # This program is distributed in the hope that it will be useful, * # but WITHOUT ANY WARRANTY; without even the implied warranty of * # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * # GNU General Public License for more details. * # * # You should have received a copy of the GNU General Public License * # along with this program; if not, write to the * # Free Software Foundation, Inc., * # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * #************************************************************************** function header() { echo "TARIS-Dendrogram (Apr 4 2008) " echo " " echo "Authors: Nestor F. Aguirre, Ray M. Marin and Edgar E. Daza C. " echo "Universidad Nacional de Colombia " echo " " } function help() { header echo "Build dendrograms, loading a similarity matrix previously " echo "generated with TARIS-Matrices. The output file is an image " echo "in PNG format " echo " " echo "Depends: " echo " 1) R ( >= 2.4) " echo " The R Project for Statistical Computing " echo " http://www.r-project.org " echo " 2) ade4 ( >= 1.4 ) " echo " Analysis of Ecological Data : " echo " Exploratory and Euclidean methods in Environmental sciences " echo " http://cran.r-project.org/web/packages/ade4/index.html " echo " " echo "For example: " echo " " echo "\$ find . -name \"*.cube\" > list " echo "\$ TARIS_Matrices -i list -m similarity -o matrix.dat " echo "\$ dendrogram.sh matrix.dat output.png " echo " " echo "Synopsis: " echo " dendrogram.sh INPUT_FILE [OUPUT_FILE] [CLUSTERING_METHOD] " echo " " echo "Description: " echo " INPUT_FILE: Text file that contains the simmilarity matrix " echo " generated by TARIS. " echo " OUPUT_FILE: Name of the PNG file with the resulting " echo " dendrogram. Default = output.png " echo " CLUSTERING_METHOD: The clustering method to be used for the " echo " dendrogram construction. The available " echo " clustering methods are: ward, single, complete, " echo " average, mcquitty, median or centroid. " echo " default = average " echo " " } function run() { header INPUT_FILE=$1 OUTPUT_FILE=$2 METHOD=$3 gawk '{ if( $2 == ":" ) { printf( "%-8s ", $1 ) } } END{ printf("\n") }' $INPUT_FILE > header gawk '{ if( $2 == ":" ) { printf("%-8s ", $1); for( i=3; i<=NF; i++ ) printf("%7.6f ", 100.0-$i); print "" }}' $INPUT_FILE > dissimilarityTMP echo -n "labels " > dissimilarity.csv cat header >> dissimilarity.csv cat dissimilarityTMP >> dissimilarity.csv rm header dissimilarityTMP # Making R script for build the dendrogram echo "require(ade4)" > dendrogram.r echo "require(foreign)" >> dendrogram.r echo "data <- as.dist(read.table(\"dissimilarity.csv\", header=TRUE, row.names=\"labels\"))" >> dendrogram.r echo "dend <- hclust(data, method=\"$METHOD\")" >> dendrogram.r echo "par(cex=0.1)" >> dendrogram.r echo "dend <- as.dendrogram(dend)" >> dendrogram.r echo "png(\"$OUTPUT_FILE\", width=800, height=600)" >> dendrogram.r echo "plot(dend, horiz=TRUE, xlab=\"Similarity\", frame.plot=FALSE, axes=TRUE)" >> dendrogram.r echo "dev.off()" >> dendrogram.r R --no-save < dendrogram.r rm dissimilarity.csv dendrogram.r return } if [ $1 ] then if [ $2 ] then if [ $3 ] then run $1 $2 $3 else run $1 $2 "average" fi else run $1 "output.png" "average" fi else help fi