using System;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Xml;
using System.Xml.Schema;
using System.Collections;
using System.Data;
using Orciid.Media.Util;

namespace Orciid.Core
{
    /// <summary>
    /// Import file reader
    /// </summary>
    /// <remarks>
    /// Analyzes and reads in an import data file
    /// </remarks>
	public class ImportFileReader
	{
        /// <summary>
        /// File types
        /// </summary>
        /// <remarks>
        /// List of file types that can be imported
        /// </remarks>
		public enum FileType
		{
            /// <summary>
            /// Unknown file type
            /// </summary>
            /// <remarks>
            /// Matches any file type that does not have an explicit entry in this list
            /// </remarks>
			UNKNOWN = 0, 
            /// <summary>
            /// XML
            /// </summary>
            /// <remarks>
            /// XML file type
            /// </remarks>
            XML, 
            /// <summary>
            /// CSV
            /// </summary>
            /// <remarks>
            /// Comma separated value file type
            /// </remarks>
            CSV, 
            /// <summary>
            /// TSV
            /// </summary>
            /// <remarks>
            /// Tab separated value file type
            /// </remarks>
            TSV
		}

        /// <summary>
        /// Value split mode
        /// </summary>
        /// <remarks>
        /// Determines how field values are split during import
        /// </remarks>
		public enum ValueSplitMode
		{
            /// <summary>
            /// None
            /// </summary>
            /// <remarks>
            /// Values will not be split
            /// </remarks>
			None, 
            /// <summary>
            /// Static
            /// </summary>
            /// <remarks>
            /// Values will be split on a static delimiter string
            /// </remarks>
            Static, 
            /// <summary>
            /// Regex
            /// </summary>
            /// <remarks>
            /// Values will be split on a regular expression delimiter string
            /// </remarks>
            Regex
		}
		
		private TextReader stream;
		private string file;
		private DataTable data;
		private FileType filetype;

		private ValueSplitMode splitmode = ValueSplitMode.None;
		private string splitat = null;

        /// <summary>
        /// Field value delimiter
        /// </summary>
        /// <remarks>
        /// Can be a static string or a regular expression, depending on the value of
        /// <see cref="SplitMode"/>.  If using a regular expression, setting 
        /// this property will cause an exception if an invalid regular expression
        /// is provided.
        /// </remarks>
        /// <value>
        /// The field value delimiter
        /// </value>
		public string SplitAt
		{
			get
			{
				return splitat;
			}
			set
			{
				if (splitmode == ValueSplitMode.Regex)
					ValidateRegexSplitAt(value);
				splitat = value;
			}
		}

        /// <summary>
        /// Field value split mode
        /// </summary>
        /// <remarks>
        /// If setting the split mode to regular expression, an exception will occur
        /// if <see cref="SplitAt"/> is set to an invalid regular expression.
        /// </remarks>
        /// <value>
        /// Field value split mode
        /// </value>
		public ValueSplitMode SplitMode
		{
			get
			{
				return splitmode;
			}
			set
			{
				if (value == ValueSplitMode.Regex)
					ValidateRegexSplitAt(splitat);
				splitmode = value;
			}
		}
				
		private void ValidateRegexSplitAt(string s)
		{
			if (s == null)
				return;
			// throws exception if pattern is not valid
			new Regex(s);
		}

        /// <summary>
        /// Constructor
        /// </summary>
        /// <remarks>
        /// Creates a new ImportFileReader object from a <see cref="TextReader"/> and
        /// a <see cref="FileType"/>.
        /// </remarks>
        /// <param name="s">The content file to import</param>
        /// <param name="ft">The file type of the content file</param>
		public ImportFileReader(TextReader s, FileType ft)
		{
			stream = s;
			filetype = ft;
		}

        /// <summary>
        /// Constructor
        /// </summary>
        /// <remarks>
        /// Creates a new ImportFileReader object from a given file and
        /// a <see cref="FileType"/>.
        /// </remarks>
        /// <param name="f">The content file to import</param>
        /// <param name="ft">The file type of the content file</param>
        public ImportFileReader(string f, FileType ft)
		{
			file = f;
			filetype = ft;
		}

        /// <summary>
        /// Get field mapping
        /// </summary>
        /// <remarks>
        /// Based on the input file and a given collection, this method returns
        /// the results of an automated mapping process and any remembered mappings
        /// and settings from similar imports in the past.
        /// </remarks>
        /// <param name="collection">The collection to base the mapping on</param>
        /// <returns>A <see cref="FieldMapping"/> object with all available mappings
        /// and settings</returns>
		public FieldMapping GetMapping(Collection collection)
		{
			return new FieldMapping(GetData(), collection);
		}

        /// <summary>
        /// Get imported data
        /// </summary>
        /// <remarks>
        /// Reads the data from the file, splitting values according to the given settings
        /// </remarks>
        /// <returns>A <see cref="DataTable"/> object with the data from the input file</returns>
		public DataTable GetData()
		{
			if (data == null)
			{
				if (stream != null)
					data = ReadFile(stream);
				else
					using (StreamReader s = new StreamReader(file, System.Text.Encoding.Default, true))
						data = ReadFile(s);
			}
			return data;
		}

		private DataTable ReadFile(TextReader stream)
		{
			DataTable table;
			switch (filetype)
			{
				case FileType.XML:
                    table = ParseXml(stream);
					break;
				case FileType.CSV:
					table = CsvParser.Parse(stream, true);
					break;
				case FileType.TSV:
					table = CsvParser.Parse(stream, true, '\t');
					break;
				default:
					throw new CoreException("Unknown file type");
			}

			if (SplitMode == ValueSplitMode.None || 
				SplitAt == null || SplitAt.Length == 0 ||
				table == null || table.Rows.Count == 0)
				return table;

			string pattern;
			if (SplitMode == ValueSplitMode.Regex)
				pattern = SplitAt;
			else if (SplitMode == ValueSplitMode.Static)
				pattern = Regex.Escape(SplitAt);
			else
				return table;

			Regex regex = new Regex(pattern, RegexOptions.Compiled | RegexOptions.ExplicitCapture);

			ArrayList additionalRows = new ArrayList();
			int rowindex = 0;
			while (rowindex < table.Rows.Count)
			{
				DataRow row = table.Rows[rowindex];
				// check for values to split
				for (int c = 0; c < table.Columns.Count; c++)
				{
					string val = row[c] as string;
					if (val == null || val.Length == 0)
						continue;
					string[] vals = regex.Split(val);
					if (vals.Length > 1)
					{
						row[c] = vals[0];
						// create new rows for split values
						for (int v = 1; v < vals.Length; v++)
						{
							DataRow newrow;
							if (v <= additionalRows.Count)
								newrow = (DataRow)additionalRows[v - 1];
							else
							{
								newrow = table.NewRow();
								additionalRows.Add(newrow);
							}
							newrow[c] = vals[v];
						}
					}
				}
				// insert additional rows into data table
				for (int i = 0; i < additionalRows.Count; i++)
					table.Rows.InsertAt((DataRow)additionalRows[i], ++rowindex);
				additionalRows.Clear();
				rowindex++;
			}
			return table;
		}

		private DataTable ParseXml(TextReader stream)
		{
			XmlDocument doc = new XmlDocument();
			doc.Load(stream);
			XmlNode root = doc.LastChild;

			// first element indicates records
			string recordnodename = root.FirstChild.Name;
			
			DataTable table = new DataTable();

			// check for VRA Core 4
			if (root.NamespaceURI == "http://www.vraweb.org/vracore4.htm")
			{
				XmlNamespaceManager nsmgr = new XmlNamespaceManager(doc.NameTable);
				nsmgr.AddNamespace("vra", "http://www.vraweb.org/vracore4.htm");
				foreach (XmlNode record in root.SelectNodes("vra:work", nsmgr))
				{
					Hashtable workentries = new Hashtable();
					ParseXmlNode(record, "", workentries, true);
					foreach (XmlNode imagenode in record.SelectNodes("vra:image", nsmgr))
					{
						Hashtable imageentries = new Hashtable();
						ParseXmlNode(imagenode, "image", imageentries, false);
						AddRowsToTable(table, workentries, imageentries);
					}
				}
			}
			else if (recordnodename == "work" && root.SelectSingleNode("work/image") != null)
			{	// we have a work/image record data file
				foreach (XmlNode record in root.SelectNodes("work"))
				{
					Hashtable workentries = new Hashtable();
					ParseXmlNode(record, "", workentries, true);
					foreach (XmlNode imagenode in record.SelectNodes("image"))
					{
						Hashtable imageentries = new Hashtable();
						ParseXmlNode(imagenode, "image", imageentries, false);
						AddRowsToTable(table, workentries, imageentries);
					}
				}
			}
			else
			{	// we have a plain record data file
				foreach (XmlNode record in root.SelectNodes(recordnodename))
				{
					Hashtable entries = new Hashtable();
					ParseXmlNode(record, "", entries, false);
					AddRowsToTable(table, entries, null);
				}
			}

			return table;
		}

		private void AddRowsToTable(DataTable table, Hashtable workentries, Hashtable imageentries)
		{
			if (workentries != null)
				foreach (string name in workentries.Keys)
					if (!table.Columns.Contains(name))
						table.Columns.Add(name);
			if (imageentries != null)
				foreach (string name in imageentries.Keys)
					if (!table.Columns.Contains(name))
						table.Columns.Add(name);

			int instancecount = 0;
			while (true)
			{					
				bool hasdata = false;
				DataRow row = table.NewRow();
				if (workentries != null)
					foreach (string name in workentries.Keys)
					{
						ArrayList values = (ArrayList)workentries[name];
						if (values.Count > instancecount)
						{
							row[name] = values[instancecount];
							hasdata = true;
						}
					}
				if (imageentries != null)
					foreach (string name in imageentries.Keys)
					{
						ArrayList values = (ArrayList)imageentries[name];
						if (values.Count > instancecount)
						{
							row[name] = values[instancecount];
							hasdata = true;
						}
					}
				if (!hasdata)
					break;
				table.Rows.Add(row);
				instancecount++;
			}
		}

		private void ParseXmlNode(XmlNode node, string prefix, Hashtable entries,
			bool ignoreImageElements)
		{
			// Don't collect attributes for simple text elements
			if (node.ChildNodes.Count != 1 || 
				node.FirstChild.NodeType != XmlNodeType.Text)
				foreach (XmlAttribute attr in node.Attributes)
					if (attr.Name != "xmlns" && attr.Value != null && attr.Value.Length > 0)
						AppendData(entries, 
							prefix + (prefix != "" ? "." : "") + attr.Name, 
							attr.Value);

			foreach (XmlNode child in node.ChildNodes)
			{
				if (child.NodeType == XmlNodeType.Element &&
					(!ignoreImageElements || child.Name != "image"))
					ParseXmlNode(child, prefix + (prefix != "" ? "." : "") + child.Name, entries, false);
				else if (child.NodeType == XmlNodeType.Text && prefix != "")
				{
					string text = child.InnerText;
					if (text != null && text.Length > 0)
						AppendData(entries, prefix, text);
				}					
			}
		}

		private void AppendData(Hashtable hash, string key, string val)
		{
			if (!hash.ContainsKey(key))
				hash[key] = new ArrayList();
			((ArrayList)hash[key]).Add(val);
		}

        /// <summary>
        /// Returns file type based on file extension
        /// </summary>
        /// <remarks>
        /// This method compares the extension of the given filename to a list
        /// of known extensions and returns the appropriate <see cref="FileType"/>.
        /// </remarks>
        /// <param name="filename">The file name</param>
        /// <returns>The matching file type, or <see cref="FileType.UNKNOWN"/> if the
        /// extension is unknown</returns>
		public static FileType GetFileTypeByExtension(string filename)
		{
			string ext = Path.GetExtension(filename).ToLower();
			if (ext == ".csv")
				return FileType.CSV;
			else if (ext == ".tsv")
				return FileType.TSV;
			else if (ext == ".xml")
				return FileType.XML;
			else
				return FileType.UNKNOWN;
		}

        /// <summary>
        /// Returns file type based on mime type
        /// </summary>
        /// <remarks>
        /// This method compares the given mime type to a list
        /// of known mime types and returns the appropriate <see cref="FileType"/>.
        /// </remarks>
        /// <param name="mimetype">The mime type</param>
        /// <returns>The matching file type, or <see cref="FileType.UNKNOWN"/> if the
        /// extension is unknown</returns>
		public static FileType GetFileTypeByMimeType(string mimetype)
		{
			// remove any additional info after a semicolon
			// to handle e.g. "text/xml;charset=ISO-8859-1"
			int semicolon = mimetype.IndexOf(";");
			if (semicolon >= 0)
				mimetype = mimetype.Substring(0, semicolon);

			if (mimetype == "text/csv" ||
				mimetype == "text/comma-separated-values")
				return FileType.CSV;
			else if (mimetype == "text/tsv" ||
				mimetype == "text/tab-separated-values")
				return FileType.TSV;
			else if (mimetype == "text/xml")
				return FileType.XML;
			else
				return FileType.UNKNOWN;
		}
	}
}
