Friday 8 February 2013


Read my project "Personalisation using Web Mining techniques" here.


C# Code Appendix for pre-processor application used in this project:

//Designed with Visual Studio 2008
using System;
using System.Collections.Generic;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.IO;
using System.Text;

// This line imports the regular expressions class library
using System.Text.RegularExpressions;

namespace LogsPreprocessor
{
    /// <summary>
    /// Application to use for loading,pre-processing and saving apache server log files
    /// Contents of a .txt file are loaded and processed using Regular expressions
    /// Noise data in the files can be removed with the use of Clean function and irrelevant
    /// requests can be removed with the aim of Filter function.
    /// </summary>
    public class FormLogs : System.Windows.Forms.Form
    {
        // Required designer variable.
        private System.ComponentModel.Container components = null;
        // Design time objects
        private System.Windows.Forms.Label labelLogs1;
        private System.Windows.Forms.TextBox textLogs1;
        private System.Windows.Forms.TextBox textLogs2;
        private System.Windows.Forms.Label labelLogs2;
        private System.Windows.Forms.Button cleanBtn;
        private Button saveBtn;
        private Button deleteBtn1;
        private Button compareBtn;
        private PictureBox magnifPic;
        private Button filterBtn;
        private OpenFileDialog openFileDialog1;
        private Button loadBtn;
        private Button deleteBtn2;
        private Label labelSalford;
        private TextBox textLogs3;
        private Label labelLogs3;
        private Button deleteBtn3;
        private SaveFileDialog saveFileDialog1;

        public FormLogs()
        {
            // Required for Windows Form Designer support
            InitializeComponent();
        }
        // Cleans up any resources being used.
        protected override void Dispose(bool disposing)
        {
            if (disposing)
            {
                if (components != null)
                {
                    components.Dispose();
                }
            }
            base.Dispose(disposing);
        }

        #region Windows Form Designer generated code
        private void InitializeComponent()
        {
            System.ComponentModel.ComponentResourceManager resources = new System.ComponentModel.ComponentResourceManager(typeof(FormLogs));
            this.labelLogs1 = new System.Windows.Forms.Label();
            this.textLogs1 = new System.Windows.Forms.TextBox();
            this.textLogs2 = new System.Windows.Forms.TextBox();
            this.labelLogs2 = new System.Windows.Forms.Label();
            this.cleanBtn = new System.Windows.Forms.Button();
            this.saveBtn = new System.Windows.Forms.Button();
            this.saveFileDialog1 = new System.Windows.Forms.SaveFileDialog();
            this.deleteBtn1 = new System.Windows.Forms.Button();
            this.compareBtn = new System.Windows.Forms.Button();
            this.magnifPic = new System.Windows.Forms.PictureBox();
            this.filterBtn = new System.Windows.Forms.Button();
            this.openFileDialog1 = new System.Windows.Forms.OpenFileDialog();
            this.loadBtn = new System.Windows.Forms.Button();
            this.deleteBtn2 = new System.Windows.Forms.Button();
            this.labelSalford = new System.Windows.Forms.Label();
            this.textLogs3 = new System.Windows.Forms.TextBox();
            this.labelLogs3 = new System.Windows.Forms.Label();
            this.deleteBtn3 = new System.Windows.Forms.Button();
            ((System.ComponentModel.ISupportInitialize)(this.magnifPic)).BeginInit();
            this.SuspendLayout();
            //
            // labelLogs1
            //
            this.labelLogs1.AutoSize = true;
            this.labelLogs1.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.labelLogs1.Location = new System.Drawing.Point(12, 50);
            this.labelLogs1.Name = "labelLogs1";
            this.labelLogs1.Size = new System.Drawing.Size(119, 17);
            this.labelLogs1.TabIndex = 7;
            this.labelLogs1.Text = "1.Original Logs";
            //
            // textLogs1
            //
            this.textLogs1.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
                        | System.Windows.Forms.AnchorStyles.Right)));
            this.textLogs1.BackColor = System.Drawing.SystemColors.MenuText;
            this.textLogs1.Font = new System.Drawing.Font("Courier New", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.textLogs1.ForeColor = System.Drawing.SystemColors.Info;
            this.textLogs1.Location = new System.Drawing.Point(3, 77);
            this.textLogs1.MaxLength = 2147483647;
            this.textLogs1.Multiline = true;
            this.textLogs1.Name = "textLogs1";
            this.textLogs1.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
            this.textLogs1.Size = new System.Drawing.Size(597, 77);
            this.textLogs1.TabIndex = 8;
            this.textLogs1.Text = "Insert apache logs here or load a file";
            //
            // textLogs2
            //
            this.textLogs2.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
                        | System.Windows.Forms.AnchorStyles.Left)
                        | System.Windows.Forms.AnchorStyles.Right)));
            this.textLogs2.BackColor = System.Drawing.SystemColors.MenuText;
            this.textLogs2.Cursor = System.Windows.Forms.Cursors.Arrow;
            this.textLogs2.Font = new System.Drawing.Font("Courier New", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.textLogs2.ForeColor = System.Drawing.SystemColors.Info;
            this.textLogs2.ImeMode = System.Windows.Forms.ImeMode.On;
            this.textLogs2.Location = new System.Drawing.Point(3, 194);
            this.textLogs2.MaxLength = 2147483647;
            this.textLogs2.Multiline = true;
            this.textLogs2.Name = "textLogs2";
            this.textLogs2.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
            this.textLogs2.Size = new System.Drawing.Size(597, 137);
            this.textLogs2.TabIndex = 16;
            this.textLogs2.Text = "\r\n\r\n\r\n";
            //
            // labelLogs2
            //
            this.labelLogs2.AutoSize = true;
            this.labelLogs2.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.labelLogs2.Location = new System.Drawing.Point(12, 167);
            this.labelLogs2.Name = "labelLogs2";
            this.labelLogs2.Size = new System.Drawing.Size(168, 17);
            this.labelLogs2.TabIndex = 15;
            this.labelLogs2.Text = "2.Pre-processed Logs";
            //
            // cleanBtn
            //
            this.cleanBtn.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.cleanBtn.Cursor = System.Windows.Forms.Cursors.Hand;
            this.cleanBtn.ForeColor = System.Drawing.Color.DarkRed;
            this.cleanBtn.Location = new System.Drawing.Point(221, 160);
            this.cleanBtn.Name = "cleanBtn";
            this.cleanBtn.Size = new System.Drawing.Size(125, 31);
            this.cleanBtn.TabIndex = 21;
            this.cleanBtn.Tag = "kkjkj";
            this.cleanBtn.Text = "Clean";
            this.cleanBtn.UseVisualStyleBackColor = false;
            this.cleanBtn.Click += new System.EventHandler(this.btnClean_Click);
            //
            // saveBtn
            //
            this.saveBtn.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.saveBtn.Cursor = System.Windows.Forms.Cursors.Hand;
            this.saveBtn.ForeColor = System.Drawing.Color.DarkRed;
            this.saveBtn.Location = new System.Drawing.Point(351, 337);
            this.saveBtn.Name = "saveBtn";
            this.saveBtn.Size = new System.Drawing.Size(130, 31);
            this.saveBtn.TabIndex = 26;
            this.saveBtn.Text = "Save Logs3";
            this.saveBtn.UseVisualStyleBackColor = false;
            this.saveBtn.Click += new System.EventHandler(this.saveBtn_Click);
            //
            // saveFileDialog1
            //
            this.saveFileDialog1.Filter = "Text Files (*.txt)|*.txt|All Files (*.*)|*.*";
            //
            // deleteBtn1
            //
            this.deleteBtn1.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.deleteBtn1.Cursor = System.Windows.Forms.Cursors.Hand;
            this.deleteBtn1.ForeColor = System.Drawing.Color.DarkRed;
            this.deleteBtn1.Location = new System.Drawing.Point(487, 41);
            this.deleteBtn1.Name = "deleteBtn1";
            this.deleteBtn1.Size = new System.Drawing.Size(113, 30);
            this.deleteBtn1.TabIndex = 30;
            this.deleteBtn1.Text = "Delete Logs1";
            this.deleteBtn1.UseVisualStyleBackColor = false;
            this.deleteBtn1.Click += new System.EventHandler(this.deleteBtn1_Click);
            //
            // compareBtn
            //
            this.compareBtn.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.compareBtn.Cursor = System.Windows.Forms.Cursors.Hand;
            this.compareBtn.ForeColor = System.Drawing.Color.Maroon;
            this.compareBtn.Location = new System.Drawing.Point(221, 41);
            this.compareBtn.Name = "compareBtn";
            this.compareBtn.Size = new System.Drawing.Size(125, 30);
            this.compareBtn.TabIndex = 31;
            this.compareBtn.Text = "Compare Lines";
            this.compareBtn.UseVisualStyleBackColor = false;
            this.compareBtn.Click += new System.EventHandler(this.countBtn_Click);
            //
            // magnifPic
            //
            this.magnifPic.BackgroundImage = ((System.Drawing.Image)(resources.GetObject("magnifPic.BackgroundImage")));
            this.magnifPic.BackgroundImageLayout = System.Windows.Forms.ImageLayout.Stretch;
            this.magnifPic.Location = new System.Drawing.Point(157, 33);
            this.magnifPic.Name = "magnifPic";
            this.magnifPic.Size = new System.Drawing.Size(43, 38);
            this.magnifPic.TabIndex = 32;
            this.magnifPic.TabStop = false;
            //
            // filterBtn
            //
            this.filterBtn.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.filterBtn.Cursor = System.Windows.Forms.Cursors.Hand;
            this.filterBtn.ForeColor = System.Drawing.Color.Maroon;
            this.filterBtn.Location = new System.Drawing.Point(221, 337);
            this.filterBtn.Name = "filterBtn";
            this.filterBtn.Size = new System.Drawing.Size(125, 31);
            this.filterBtn.TabIndex = 33;
            this.filterBtn.Text = "Filter";
            this.filterBtn.UseVisualStyleBackColor = false;
            this.filterBtn.Click += new System.EventHandler(this.filterBtn_Click);
            //
            // loadBtn
            //
            this.loadBtn.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.loadBtn.Cursor = System.Windows.Forms.Cursors.Hand;
            this.loadBtn.ForeColor = System.Drawing.Color.Maroon;
            this.loadBtn.Location = new System.Drawing.Point(351, 41);
            this.loadBtn.Name = "loadBtn";
            this.loadBtn.Size = new System.Drawing.Size(130, 30);
            this.loadBtn.TabIndex = 34;
            this.loadBtn.Text = "Load Logs File";
            this.loadBtn.UseVisualStyleBackColor = false;
            this.loadBtn.Click += new System.EventHandler(this.openBtn_Click_1);
            //
            // deleteBtn2
            //
            this.deleteBtn2.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.deleteBtn2.Cursor = System.Windows.Forms.Cursors.Hand;
            this.deleteBtn2.ForeColor = System.Drawing.Color.DarkRed;
            this.deleteBtn2.Location = new System.Drawing.Point(487, 157);
            this.deleteBtn2.Name = "deleteBtn2";
            this.deleteBtn2.Size = new System.Drawing.Size(113, 31);
            this.deleteBtn2.TabIndex = 37;
            this.deleteBtn2.Text = "Delete Logs2";
            this.deleteBtn2.UseVisualStyleBackColor = false;
            this.deleteBtn2.Click += new System.EventHandler(this.deleteBtn2_Click);
           //
            // labelSalford
            //
            this.labelSalford.AutoSize = true;
            this.labelSalford.ForeColor = System.Drawing.Color.Green;
            this.labelSalford.Location = new System.Drawing.Point(218, 9);
            this.labelSalford.Name = "labelSalford";
            this.labelSalford.Size = new System.Drawing.Size(171, 17);
            this.labelSalford.TabIndex = 38;
            this.labelSalford.Text = "University of Salford 2011";
            //
            // textLogs3
            //
            this.textLogs3.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
                        | System.Windows.Forms.AnchorStyles.Left)
                        | System.Windows.Forms.AnchorStyles.Right)));
            this.textLogs3.BackColor = System.Drawing.SystemColors.MenuText;
            this.textLogs3.Cursor = System.Windows.Forms.Cursors.Arrow;
            this.textLogs3.Font = new System.Drawing.Font("Courier New", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.textLogs3.ForeColor = System.Drawing.SystemColors.Info;
            this.textLogs3.ImeMode = System.Windows.Forms.ImeMode.On;
            this.textLogs3.Location = new System.Drawing.Point(3, 374);
            this.textLogs3.MaxLength = 2147483647;
            this.textLogs3.Multiline = true;
            this.textLogs3.Name = "textLogs3";
            this.textLogs3.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
            this.textLogs3.Size = new System.Drawing.Size(597, 114);
            this.textLogs3.TabIndex = 39;
            //
            // labelLogs3
            //
            this.labelLogs3.AutoSize = true;
            this.labelLogs3.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.labelLogs3.Location = new System.Drawing.Point(12, 351);
            this.labelLogs3.Name = "labelLogs3";
            this.labelLogs3.Size = new System.Drawing.Size(177, 17);
            this.labelLogs3.TabIndex = 40;
            this.labelLogs3.Text = "3.Pre-processed Logs+";
            //
            // deleteBtn3
            //
            this.deleteBtn3.BackColor = System.Drawing.SystemColors.InactiveBorder;
            this.deleteBtn3.Cursor = System.Windows.Forms.Cursors.Hand;
            this.deleteBtn3.ForeColor = System.Drawing.Color.DarkRed;
            this.deleteBtn3.Location = new System.Drawing.Point(487, 337);
            this.deleteBtn3.Name = "deleteBtn3";
            this.deleteBtn3.Size = new System.Drawing.Size(113, 31);
            this.deleteBtn3.TabIndex = 41;
            this.deleteBtn3.Text = "Delete Logs3";
            this.deleteBtn3.UseVisualStyleBackColor = false;
            this.deleteBtn3.Click += new System.EventHandler(this.deleteBtn3_Click);
            //
            // FormLogs
            //
            this.AutoScaleBaseSize = new System.Drawing.Size(6, 16);
            this.AutoSizeMode = System.Windows.Forms.AutoSizeMode.GrowAndShrink;
            this.BackColor = System.Drawing.SystemColors.ActiveBorder;
            this.ClientSize = new System.Drawing.Size(604, 500);
            this.Controls.Add(this.deleteBtn3);
            this.Controls.Add(this.labelLogs3);
            this.Controls.Add(this.textLogs3);
            this.Controls.Add(this.labelSalford);
            this.Controls.Add(this.deleteBtn2);
            this.Controls.Add(this.loadBtn);
            this.Controls.Add(this.filterBtn);
            this.Controls.Add(this.magnifPic);
            this.Controls.Add(this.compareBtn);
            this.Controls.Add(this.deleteBtn1);
            this.Controls.Add(this.saveBtn);
            this.Controls.Add(this.textLogs2);
            this.Controls.Add(this.textLogs1);
            this.Controls.Add(this.labelLogs2);
            this.Controls.Add(this.labelLogs1);
            this.Controls.Add(this.cleanBtn);
            this.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
            this.MinimumSize = new System.Drawing.Size(500, 450);
            this.Name = "FormLogs";
            this.Text = "Logs Pre-Processor Tool ";
            this.TopMost = true;
            ((System.ComponentModel.ISupportInitialize)(this.magnifPic)).EndInit();
            this.ResumeLayout(false);
            this.PerformLayout();
        }
        #endregion
        // The main entry point for the application
        [STAThread]
        static void Main()
        {
            Application.Run(new FormLogs());
        }
        // Constructs a RegexOptions object so that a RegexOptions object 
        // is not needed to be used in  the Regex() constructor
        private RegexOptions getRegexOptions()
        {
            RegexOptions options = new RegexOptions();
            //Ignores letter case
            options |= RegexOptions.IgnoreCase;
            //Compiles regular expressions using an assembly
            options |= RegexOptions.Compiled;
            //\w, \d and \s match ASCII characters only,
            //and \10 is backreference 1 followed by a
            //literal 0 rather than octal escape 10.
            options |= RegexOptions.IgnorePatternWhitespace;
            return options;
        }
        //Array for storing Regex.Split results
        private void printCleanArray(string[]array)
        {
            textLogs2.Text = "";
            for (int i = 0; i < array.Length; i++)
            {
                textLogs2.AppendText(array[i] + "\r\n");
            }
        }
        private void btnClean_Click(object sender, System.EventArgs e)
        // Regex.Split used for splitting a single string into an array of strings
        // using Regex.Split.The string is cut at each point where the regex matches.  The part of
        // the string matched by the regex is thrown away.
        {
            Regex myRegex = new Regex("HTTP/*d*.*d*\"*d*d*d*d*d*d*\"*D*]*:*//*D*D*D*.*");
            printCleanArray(Regex.Split(textLogs1.Text, myRegex.ToString(), getRegexOptions()));

            //Regex.Replace used  for replacing "forum" to "forum.html" in the log files
            //for matching forum visits in filter process
             String replaceForum = "forum.html";
             Regex myRegex2 = new Regex("forum/", RegexOptions.Compiled);
             textLogs2.Text = Regex.Replace(textLogs2.Text, myRegex2.ToString(), replaceForum.ToString(), getRegexOptions());    
        }
        private void saveBtn_Click(object sender, EventArgs e)
        {
            //Opens a saveFileDialog window for saving the content of the textResults to a .txt file
            SaveFileDialog saveFileDialog1 = new SaveFileDialog();
            saveFileDialog1.Filter = "Text Files (*.txt)|*.txt|All Files (*.*)|*.*";
            saveFileDialog1.Title = "Save the new Log File";

            if (saveFileDialog1.ShowDialog() == DialogResult.OK)
            {
                using (StreamWriter sw = new StreamWriter(saveFileDialog1.FileName))
                {
                    sw.Write(textLogs3.Text);
                }
            }
        }
        private void countBtn_Click(object sender, EventArgs e)
            //Count the lines of textLogs1 and textLogs3 and displays a messagebox
            //for comparing the lines of each textbox.
        {
            {
                StringBuilder lineInfo = new StringBuilder();
                StringBuilder lineInfo2 = new StringBuilder();
                lineInfo.Append("Lines before " + textLogs1.Lines.Length.ToString() );
                lineInfo2.Append("Lines now " + textLogs3.Lines.Length.ToString() );
                MessageBox.Show( lineInfo2.ToString(),lineInfo.ToString());
            }
        }
        private void filterBtn_Click(object sender, EventArgs e)
        // This method test if strings can be matched  and gets the text of the matches,by a regex using the System.Text.RegularExpressions.Regex.Matches static method.
        {
            Regex myRegex = new Regex(".*html |.*pdf " ,RegexOptions.Compiled);
            string inputString = textLogs2.Text;
            MatchCollection myMatchCollection = myRegex.Matches(inputString);
           
            GroupCollection myGroupCollection;

            foreach (Match myMatch in myMatchCollection)
            {
                textLogs3.AppendText(myMatch.ToString() + "\r\n");
                myGroupCollection = myMatch.Groups;
        }
            }
        private void openBtn_Click_1(object sender, EventArgs e)
        {
            {
                //Opens a saveFileDialog window for saving the content of the textResults to a .txt file
                OpenFileDialog openFileDialog1 = new OpenFileDialog();
                openFileDialog1.Filter = "Text Files (*.txt)|*.txt|All Files (*.*)|*.*";
                openFileDialog1.Title = "Open a Log File";

                if (openFileDialog1.ShowDialog() == DialogResult.OK)
                {
                    this.textLogs1.Text = File.ReadAllText(openFileDialog1.FileName);
                }
            }
        }
        private void deleteBtn1_Click(object sender, EventArgs e)
        {
            // Deletes the content of the textLogs1 textbox
            textLogs1.Text = "";
        }
        private void deleteBtn2_Click(object sender, EventArgs e)
        {
            {
                // Deletes the content of the textLogs2 textbox
                textLogs2.Text = "";
            }
        }
        private void deleteBtn3_Click(object sender, EventArgs e)
        {
            {
                // Deletes the content of the textLogs3 textbox
                textLogs3.Text = "";
            }
        }
    }
}
     
Nikolas Georgiou