Read my project "Personalisation using Web Mining techniques" here.
C# Code Appendix for pre-processor application used in this project:
//Designed with Visual Studio 2008
using System;
using System.Collections.Generic;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.IO;
using System.Text;
// This line imports the regular expressions class library
using System.Text.RegularExpressions;
namespace LogsPreprocessor
{
/// <summary>
/// Application to use for
loading,pre-processing and saving apache server log files
/// Contents of a .txt file are
loaded and processed using Regular expressions
/// Noise data in the files can
be removed with the use of Clean function and irrelevant
/// requests can be removed with
the aim of Filter function.
/// </summary>
public class FormLogs :
System.Windows.Forms.Form
{
// Required designer
variable.
private
System.ComponentModel.Container components = null;
// Design time objects
private
System.Windows.Forms.Label labelLogs1;
private
System.Windows.Forms.TextBox textLogs1;
private System.Windows.Forms.TextBox
textLogs2;
private
System.Windows.Forms.Label labelLogs2;
private
System.Windows.Forms.Button cleanBtn;
private Button saveBtn;
private Button deleteBtn1;
private Button compareBtn;
private PictureBox
magnifPic;
private Button filterBtn;
private OpenFileDialog
openFileDialog1;
private Button loadBtn;
private Button deleteBtn2;
private Label labelSalford;
private TextBox textLogs3;
private Label labelLogs3;
private Button deleteBtn3;
private SaveFileDialog
saveFileDialog1;
public FormLogs()
{
// Required for Windows
Form Designer support
InitializeComponent();
}
// Cleans up any resources
being used.
protected override void
Dispose(bool disposing)
{
if (disposing)
{
if (components !=
null)
{
components.Dispose();
}
}
base.Dispose(disposing);
}
#region Windows Form
Designer generated code
private void
InitializeComponent()
{
System.ComponentModel.ComponentResourceManager resources = new
System.ComponentModel.ComponentResourceManager(typeof(FormLogs));
this.labelLogs1 = new
System.Windows.Forms.Label();
this.textLogs1 = new
System.Windows.Forms.TextBox();
this.textLogs2 = new
System.Windows.Forms.TextBox();
this.labelLogs2 = new
System.Windows.Forms.Label();
this.cleanBtn = new
System.Windows.Forms.Button();
this.saveBtn = new
System.Windows.Forms.Button();
this.saveFileDialog1 =
new System.Windows.Forms.SaveFileDialog();
this.deleteBtn1 = new
System.Windows.Forms.Button();
this.compareBtn = new
System.Windows.Forms.Button();
this.magnifPic = new System.Windows.Forms.PictureBox();
this.filterBtn = new
System.Windows.Forms.Button();
this.openFileDialog1 =
new System.Windows.Forms.OpenFileDialog();
this.loadBtn = new
System.Windows.Forms.Button();
this.deleteBtn2 = new
System.Windows.Forms.Button();
this.labelSalford = new
System.Windows.Forms.Label();
this.textLogs3 = new
System.Windows.Forms.TextBox();
this.labelLogs3 = new
System.Windows.Forms.Label();
this.deleteBtn3 = new
System.Windows.Forms.Button();
((System.ComponentModel.ISupportInitialize)(this.magnifPic)).BeginInit();
this.SuspendLayout();
//
// labelLogs1
//
this.labelLogs1.AutoSize
= true;
this.labelLogs1.Font =
new System.Drawing.Font("Microsoft Sans Serif", 10F,
System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.labelLogs1.Location
= new System.Drawing.Point(12, 50);
this.labelLogs1.Name =
"labelLogs1";
this.labelLogs1.Size =
new System.Drawing.Size(119, 17);
this.labelLogs1.TabIndex
= 7;
this.labelLogs1.Text =
"1.Original Logs";
//
// textLogs1
//
this.textLogs1.Anchor =
((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top |
System.Windows.Forms.AnchorStyles.Left)
|
System.Windows.Forms.AnchorStyles.Right)));
this.textLogs1.BackColor
= System.Drawing.SystemColors.MenuText;
this.textLogs1.Font =
new System.Drawing.Font("Courier New", 10F,
System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point,
((byte)(0)));
this.textLogs1.ForeColor
= System.Drawing.SystemColors.Info;
this.textLogs1.Location
= new System.Drawing.Point(3, 77);
this.textLogs1.MaxLength
= 2147483647;
this.textLogs1.Multiline = true;
this.textLogs1.Name =
"textLogs1";
this.textLogs1.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.textLogs1.Size =
new System.Drawing.Size(597, 77);
this.textLogs1.TabIndex
= 8;
this.textLogs1.Text =
"Insert apache logs here or load a file";
//
// textLogs2
//
this.textLogs2.Anchor =
((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top |
System.Windows.Forms.AnchorStyles.Bottom)
|
System.Windows.Forms.AnchorStyles.Left)
|
System.Windows.Forms.AnchorStyles.Right)));
this.textLogs2.BackColor
= System.Drawing.SystemColors.MenuText;
this.textLogs2.Cursor =
System.Windows.Forms.Cursors.Arrow;
this.textLogs2.Font =
new System.Drawing.Font("Courier New", 10F,
System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point,
((byte)(0)));
this.textLogs2.ForeColor
= System.Drawing.SystemColors.Info;
this.textLogs2.ImeMode =
System.Windows.Forms.ImeMode.On;
this.textLogs2.Location
= new System.Drawing.Point(3, 194);
this.textLogs2.MaxLength
= 2147483647;
this.textLogs2.Multiline
= true;
this.textLogs2.Name =
"textLogs2";
this.textLogs2.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.textLogs2.Size =
new System.Drawing.Size(597, 137);
this.textLogs2.TabIndex
= 16;
this.textLogs2.Text =
"\r\n\r\n\r\n";
//
// labelLogs2
//
this.labelLogs2.AutoSize
= true;
this.labelLogs2.Font = new
System.Drawing.Font("Microsoft Sans Serif", 10F,
System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.labelLogs2.Location
= new System.Drawing.Point(12, 167);
this.labelLogs2.Name =
"labelLogs2";
this.labelLogs2.Size =
new System.Drawing.Size(168, 17);
this.labelLogs2.TabIndex
= 15;
this.labelLogs2.Text =
"2.Pre-processed Logs";
//
// cleanBtn
//
this.cleanBtn.BackColor
= System.Drawing.SystemColors.InactiveBorder;
this.cleanBtn.Cursor =
System.Windows.Forms.Cursors.Hand;
this.cleanBtn.ForeColor
= System.Drawing.Color.DarkRed;
this.cleanBtn.Location =
new System.Drawing.Point(221, 160);
this.cleanBtn.Name =
"cleanBtn";
this.cleanBtn.Size = new
System.Drawing.Size(125, 31);
this.cleanBtn.TabIndex =
21;
this.cleanBtn.Tag =
"kkjkj";
this.cleanBtn.Text =
"Clean";
this.cleanBtn.UseVisualStyleBackColor = false;
this.cleanBtn.Click +=
new System.EventHandler(this.btnClean_Click);
//
// saveBtn
//
this.saveBtn.BackColor =
System.Drawing.SystemColors.InactiveBorder;
this.saveBtn.Cursor =
System.Windows.Forms.Cursors.Hand;
this.saveBtn.ForeColor =
System.Drawing.Color.DarkRed;
this.saveBtn.Location =
new System.Drawing.Point(351, 337);
this.saveBtn.Name = "saveBtn";
this.saveBtn.Size = new
System.Drawing.Size(130, 31);
this.saveBtn.TabIndex =
26;
this.saveBtn.Text =
"Save Logs3";
this.saveBtn.UseVisualStyleBackColor = false;
this.saveBtn.Click +=
new System.EventHandler(this.saveBtn_Click);
//
// saveFileDialog1
//
this.saveFileDialog1.Filter = "Text Files (*.txt)|*.txt|All Files
(*.*)|*.*";
//
// deleteBtn1
//
this.deleteBtn1.BackColor = System.Drawing.SystemColors.InactiveBorder;
this.deleteBtn1.Cursor =
System.Windows.Forms.Cursors.Hand;
this.deleteBtn1.ForeColor = System.Drawing.Color.DarkRed;
this.deleteBtn1.Location
= new System.Drawing.Point(487, 41);
this.deleteBtn1.Name =
"deleteBtn1";
this.deleteBtn1.Size =
new System.Drawing.Size(113, 30);
this.deleteBtn1.TabIndex
= 30;
this.deleteBtn1.Text =
"Delete Logs1";
this.deleteBtn1.UseVisualStyleBackColor = false;
this.deleteBtn1.Click +=
new System.EventHandler(this.deleteBtn1_Click);
//
// compareBtn
//
this.compareBtn.BackColor =
System.Drawing.SystemColors.InactiveBorder;
this.compareBtn.Cursor =
System.Windows.Forms.Cursors.Hand;
this.compareBtn.ForeColor = System.Drawing.Color.Maroon;
this.compareBtn.Location
= new System.Drawing.Point(221, 41);
this.compareBtn.Name =
"compareBtn";
this.compareBtn.Size =
new System.Drawing.Size(125, 30);
this.compareBtn.TabIndex
= 31;
this.compareBtn.Text =
"Compare Lines";
this.compareBtn.UseVisualStyleBackColor = false;
this.compareBtn.Click +=
new System.EventHandler(this.countBtn_Click);
//
// magnifPic
//
this.magnifPic.BackgroundImage =
((System.Drawing.Image)(resources.GetObject("magnifPic.BackgroundImage")));
this.magnifPic.BackgroundImageLayout =
System.Windows.Forms.ImageLayout.Stretch;
this.magnifPic.Location
= new System.Drawing.Point(157, 33);
this.magnifPic.Name =
"magnifPic";
this.magnifPic.Size =
new System.Drawing.Size(43, 38);
this.magnifPic.TabIndex
= 32;
this.magnifPic.TabStop =
false;
//
// filterBtn
//
this.filterBtn.BackColor
= System.Drawing.SystemColors.InactiveBorder;
this.filterBtn.Cursor =
System.Windows.Forms.Cursors.Hand;
this.filterBtn.ForeColor
= System.Drawing.Color.Maroon;
this.filterBtn.Location = new
System.Drawing.Point(221, 337);
this.filterBtn.Name =
"filterBtn";
this.filterBtn.Size =
new System.Drawing.Size(125, 31);
this.filterBtn.TabIndex
= 33;
this.filterBtn.Text =
"Filter";
this.filterBtn.UseVisualStyleBackColor = false;
this.filterBtn.Click +=
new System.EventHandler(this.filterBtn_Click);
//
// loadBtn
//
this.loadBtn.BackColor =
System.Drawing.SystemColors.InactiveBorder;
this.loadBtn.Cursor =
System.Windows.Forms.Cursors.Hand;
this.loadBtn.ForeColor =
System.Drawing.Color.Maroon;
this.loadBtn.Location =
new System.Drawing.Point(351, 41);
this.loadBtn.Name =
"loadBtn";
this.loadBtn.Size = new
System.Drawing.Size(130, 30);
this.loadBtn.TabIndex =
34;
this.loadBtn.Text =
"Load Logs File";
this.loadBtn.UseVisualStyleBackColor = false;
this.loadBtn.Click +=
new System.EventHandler(this.openBtn_Click_1);
//
// deleteBtn2
//
this.deleteBtn2.BackColor = System.Drawing.SystemColors.InactiveBorder;
this.deleteBtn2.Cursor =
System.Windows.Forms.Cursors.Hand;
this.deleteBtn2.ForeColor = System.Drawing.Color.DarkRed;
this.deleteBtn2.Location
= new System.Drawing.Point(487, 157);
this.deleteBtn2.Name =
"deleteBtn2";
this.deleteBtn2.Size =
new System.Drawing.Size(113, 31);
this.deleteBtn2.TabIndex
= 37;
this.deleteBtn2.Text =
"Delete Logs2";
this.deleteBtn2.UseVisualStyleBackColor = false;
this.deleteBtn2.Click +=
new System.EventHandler(this.deleteBtn2_Click);
//
// labelSalford
//
this.labelSalford.AutoSize = true;
this.labelSalford.ForeColor = System.Drawing.Color.Green;
this.labelSalford.Location = new System.Drawing.Point(218, 9);
this.labelSalford.Name =
"labelSalford";
this.labelSalford.Size =
new System.Drawing.Size(171, 17);
this.labelSalford.TabIndex = 38;
this.labelSalford.Text =
"University of Salford 2011";
//
// textLogs3
//
this.textLogs3.Anchor =
((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top |
System.Windows.Forms.AnchorStyles.Bottom)
|
System.Windows.Forms.AnchorStyles.Left)
|
System.Windows.Forms.AnchorStyles.Right)));
this.textLogs3.BackColor
= System.Drawing.SystemColors.MenuText;
this.textLogs3.Cursor =
System.Windows.Forms.Cursors.Arrow;
this.textLogs3.Font =
new System.Drawing.Font("Courier New", 10F,
System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point,
((byte)(0)));
this.textLogs3.ForeColor
= System.Drawing.SystemColors.Info;
this.textLogs3.ImeMode =
System.Windows.Forms.ImeMode.On;
this.textLogs3.Location
= new System.Drawing.Point(3, 374);
this.textLogs3.MaxLength
= 2147483647;
this.textLogs3.Multiline
= true;
this.textLogs3.Name =
"textLogs3";
this.textLogs3.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.textLogs3.Size =
new System.Drawing.Size(597, 114);
this.textLogs3.TabIndex
= 39;
//
// labelLogs3
//
this.labelLogs3.AutoSize
= true;
this.labelLogs3.Font =
new System.Drawing.Font("Microsoft Sans Serif", 10F,
System.Drawing.FontStyle.Bold, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
this.labelLogs3.Location
= new System.Drawing.Point(12, 351);
this.labelLogs3.Name =
"labelLogs3";
this.labelLogs3.Size =
new System.Drawing.Size(177, 17);
this.labelLogs3.TabIndex
= 40;
this.labelLogs3.Text =
"3.Pre-processed Logs+";
//
// deleteBtn3
//
this.deleteBtn3.BackColor = System.Drawing.SystemColors.InactiveBorder;
this.deleteBtn3.Cursor =
System.Windows.Forms.Cursors.Hand;
this.deleteBtn3.ForeColor = System.Drawing.Color.DarkRed;
this.deleteBtn3.Location
= new System.Drawing.Point(487, 337);
this.deleteBtn3.Name =
"deleteBtn3";
this.deleteBtn3.Size =
new System.Drawing.Size(113, 31);
this.deleteBtn3.TabIndex
= 41;
this.deleteBtn3.Text =
"Delete Logs3";
this.deleteBtn3.UseVisualStyleBackColor = false;
this.deleteBtn3.Click +=
new System.EventHandler(this.deleteBtn3_Click);
//
// FormLogs
//
this.AutoScaleBaseSize =
new System.Drawing.Size(6, 16);
this.AutoSizeMode =
System.Windows.Forms.AutoSizeMode.GrowAndShrink;
this.BackColor =
System.Drawing.SystemColors.ActiveBorder;
this.ClientSize = new
System.Drawing.Size(604, 500);
this.Controls.Add(this.deleteBtn3);
this.Controls.Add(this.labelLogs3);
this.Controls.Add(this.textLogs3);
this.Controls.Add(this.labelSalford);
this.Controls.Add(this.deleteBtn2);
this.Controls.Add(this.loadBtn);
this.Controls.Add(this.filterBtn);
this.Controls.Add(this.magnifPic);
this.Controls.Add(this.compareBtn);
this.Controls.Add(this.deleteBtn1);
this.Controls.Add(this.saveBtn);
this.Controls.Add(this.textLogs2);
this.Controls.Add(this.textLogs1);
this.Controls.Add(this.labelLogs2);
this.Controls.Add(this.labelLogs1);
this.Controls.Add(this.cleanBtn);
this.Font = new
System.Drawing.Font("Microsoft Sans Serif", 10F,
System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point,
((byte)(0)));
this.MinimumSize = new
System.Drawing.Size(500, 450);
this.Name =
"FormLogs";
this.Text = "Logs
Pre-Processor Tool ";
this.TopMost = true;
((System.ComponentModel.ISupportInitialize)(this.magnifPic)).EndInit();
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
// The main entry point for
the application
[STAThread]
static void Main()
{
Application.Run(new
FormLogs());
}
// Constructs a RegexOptions
object so that a RegexOptions object
// is not needed to be used
in the Regex() constructor
private RegexOptions
getRegexOptions()
{
RegexOptions options =
new RegexOptions();
//Ignores letter case
options |=
RegexOptions.IgnoreCase;
//Compiles regular
expressions using an assembly
options |=
RegexOptions.Compiled;
//\w, \d and \s match
ASCII characters only,
//and \10 is
backreference 1 followed by a
//literal 0 rather than octal
escape 10.
options |=
RegexOptions.IgnorePatternWhitespace;
return options;
}
//Array for storing
Regex.Split results
private void
printCleanArray(string[]array)
{
textLogs2.Text = "";
for (int i = 0; i <
array.Length; i++)
{
textLogs2.AppendText(array[i] + "\r\n");
}
}
private void
btnClean_Click(object sender, System.EventArgs e)
// Regex.Split used for splitting
a single string into an array of strings
// using Regex.Split.The
string is cut at each point where the regex matches. The part of
// the string matched by the
regex is thrown away.
{
Regex myRegex = new
Regex("HTTP/*d*.*d*\"*d*d*d*d*d*d*\"*D*]*:*//*D*D*D*.*");
printCleanArray(Regex.Split(textLogs1.Text, myRegex.ToString(),
getRegexOptions()));
//Regex.Replace
used for replacing "forum" to
"forum.html" in the log files
//for matching forum
visits in filter process
String replaceForum =
"forum.html";
Regex myRegex2 = new
Regex("forum/", RegexOptions.Compiled);
textLogs2.Text =
Regex.Replace(textLogs2.Text, myRegex2.ToString(), replaceForum.ToString(),
getRegexOptions());
}
private void
saveBtn_Click(object sender, EventArgs e)
{
//Opens a saveFileDialog
window for saving the content of the textResults to a .txt file
SaveFileDialog
saveFileDialog1 = new SaveFileDialog();
saveFileDialog1.Filter =
"Text Files (*.txt)|*.txt|All Files (*.*)|*.*";
saveFileDialog1.Title =
"Save the new Log File";
if
(saveFileDialog1.ShowDialog() == DialogResult.OK)
{
using (StreamWriter
sw = new StreamWriter(saveFileDialog1.FileName))
{
sw.Write(textLogs3.Text);
}
}
}
private void countBtn_Click(object
sender, EventArgs e)
//Count the lines of
textLogs1 and textLogs3 and displays a messagebox
//for comparing the
lines of each textbox.
{
{
StringBuilder
lineInfo = new StringBuilder();
StringBuilder
lineInfo2 = new StringBuilder();
lineInfo.Append("Lines before " +
textLogs1.Lines.Length.ToString() );
lineInfo2.Append("Lines now " +
textLogs3.Lines.Length.ToString() );
MessageBox.Show(
lineInfo2.ToString(),lineInfo.ToString());
}
}
private void
filterBtn_Click(object sender, EventArgs e)
// This method test if
strings can be matched and gets the text
of the matches,by a regex using the
System.Text.RegularExpressions.Regex.Matches static method.
{
Regex myRegex = new
Regex(".*html |.*pdf " ,RegexOptions.Compiled);
string inputString =
textLogs2.Text;
MatchCollection
myMatchCollection = myRegex.Matches(inputString);
GroupCollection
myGroupCollection;
foreach (Match myMatch
in myMatchCollection)
{
textLogs3.AppendText(myMatch.ToString() + "\r\n");
myGroupCollection =
myMatch.Groups;
}
}
private void
openBtn_Click_1(object sender, EventArgs e)
{
{
//Opens a
saveFileDialog window for saving the content of the textResults to a .txt file
OpenFileDialog
openFileDialog1 = new OpenFileDialog();
openFileDialog1.Filter = "Text Files (*.txt)|*.txt|All Files (*.*)|*.*";
openFileDialog1.Title = "Open a Log File";
if
(openFileDialog1.ShowDialog() == DialogResult.OK)
{
this.textLogs1.Text = File.ReadAllText(openFileDialog1.FileName);
}
}
}
private void
deleteBtn1_Click(object sender, EventArgs e)
{
// Deletes the content
of the textLogs1 textbox
textLogs1.Text =
"";
}
private void
deleteBtn2_Click(object sender, EventArgs e)
{
{
// Deletes the
content of the textLogs2 textbox
textLogs2.Text =
"";
}
}
private void
deleteBtn3_Click(object sender, EventArgs e)
{
{
// Deletes the
content of the textLogs3 textbox
textLogs3.Text =
"";
}
}
}
}
Nikolas Georgiou