#!/usr/bin/perl -w # --------------------------------------------------------------- # Perl code to convert email delimted data to Timesearcher format # --------------------------------------------------------------- # To execute the script, type # perl p_codeTSfmt.pl # # Input file data format supported: time-katy_iv.txt # (Available at: http://iv.slis.indiana.edu/lm/lm-time-series.html) # # Output file generated can be directly used for visualization in # time-searcher application # # --------------------------------------------------------------- # Read the input file # ------------------- open(IN, $ARGV[0]); @data = ; close(IN); # remove the header line # ---------------------- shift(@data); # Split the record based on the delimiter and generate arrays of # names, date and combination of both # --------------------------------------------------------------- @new_data = (); foreach(@data) { my($time_log, $name, $sub) = split("\\|", $_); @tmp_time = (); @tmp_time = split("\\s", $time_log); $date = ""; $date = "$tmp_time[2]"."$tmp_time[1]"; push(@date, $date); $name =~s/(.*)?\<.*/$1/; $name =~s/^\s+//; $name =~s/\s$//; push(@name, $name); $new_data = ""; $new_data = "$date#$name"; push(@new_data, $new_data); } # Find unique names # -------------------- $hash_name = (); foreach(@name) { $hash_name{$_}++; } @uniq_names = (); foreach(keys %hash_name){ push(@uniq_names, $_); } $len_uniq_names = @uniq_names; # Find unique dates # ----------------- @uniq_dates = (); $tmp_date = ""; push(@uniq_dates, $tmp_date); foreach(@date){ if ($_ ne $tmp_date) { $tmp_date = $_; push(@uniq_dates, $tmp_date); } } shift(@uniq_dates); $len_uniq_dates = @uniq_dates; # Generate the output file in .tqd format # --------------------------------------- open(OUT, ">$ARGV[1]"); print OUT"# title\n"; print OUT"Enter time searcher title here ...\n"; print OUT"# static attributes\nSender,String\n"; print OUT"# Dynamic atts\nemailNumber,Int\n"; print OUT"# of time points=n\n"; print OUT"$len_uniq_dates\n"; print OUT"# of records k\n"; print OUT"$len_uniq_names\n"; print OUT"# time point labels\n"; $date_string = join(",", @uniq_dates); print OUT"$date_string\n"; print OUT"#stat1, dynamic \@ t1, dynamic \@t2, ....,dynamic \@tn\n"; print OUT"# each dynamic are the v1, v2, ..vk \@ time tj\.\n"; foreach $name(@uniq_names){ $name_freq_str = "$name,"; foreach $date(@uniq_dates){ $date_name_str = "$date#$name"; $ctr = 0; foreach(@new_data) { if ($_ eq $date_name_str) { $ctr++; } } $name_freq_str .= "$ctr," } $name_freq_str =~s/,$//; print OUT"$name_freq_str\n"; } close(OUT);