Tuesday, October 4, 2011

Convert HTML Table to DataSet in Asp.Net

private DataSet ConvertHTMLTablesToDataSet(string HTML)
        // Declarations 
        DataSet ds = new DataSet();
        DataTable dt = null;
        DataRow dr = null;
        DataColumn dc = null;
        string TableExpression = "<table[^>]*>(.*?)</table>";
        string HeaderExpression = "<th[^>]*>(.*?)</th>";
        string RowExpression = "<tr[^>]*>(.*?)</tr>";
        string ColumnExpression = "<td[^>]*>(.*?)</td>";
        bool HeadersExist = false;
        int iCurrentColumn = 0;
        int iCurrentRow = 0;

        // Get a match for all the tables in the HTML 
        MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);

        // Loop through each table element 
        foreach (Match Table in Tables)
            // Reset the current row counter and the header flag 
            iCurrentRow = 0;
            HeadersExist = false;

            // Add a new table to the DataSet 
            dt = new DataTable();

            //Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names) 
            if (Table.Value.Contains("<th"))
                // Set the HeadersExist flag 
                HeadersExist = true;

                // Get a match for all the rows in the table 
                MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);

                // Loop through each header element 
                foreach (Match Header in Headers)
                for (int iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase).Count; iColumns++)
                    dt.Columns.Add("Column " + iColumns);

            //Get a match for all the rows in the table 

            MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);

            // Loop through each row element 
            foreach (Match Row in Rows)
                // Only loop through the row if it isn't a header row 
                if (!(iCurrentRow == 0 && HeadersExist))
                    // Create a new row and reset the current column counter 
                    dr = dt.NewRow();
                    iCurrentColumn = 0;

                    // Get a match for all the columns in the row 
                    MatchCollection Columns = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);

                    // Loop through each column element 
                    foreach (Match Column in Columns)
                        // Add the value to the DataRow 
                        dr[iCurrentColumn] = Column.Groups[1].ToString();

                        // Increase the current column  

                    // Add the DataRow to the DataTable 


                // Increase the current row counter 

            // Add the DataTable to the DataSet 


        return ds;


1 comment:

  1. Thank you. I needed to generate a DataTable and the class worked perfectly.


Ajax CalendarExtender displaying at wrong position in Chrome

< script type ="text/javascript" language ="javascript">     function onCalendarShown(sender, args)...