4 回答
TA贡献1803条经验 获得超3个赞
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0, end = 0, length = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var mc = Regex.Matches(lines[i], @"(?:^| )-");
foreach (Match m in mc)
{
int start = m.Value.StartsWith(" ") ? m.Index + 1 : m.Index;
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length - 1) ;
length = Math.Min(end - start, lines[i - 1].Length - start);
string key = length > 0 ? lines[i - 1].Substring(start, length).Trim() : "";
end = start;
while (lines[i][end++] == '-' && end < lines[i].Length) ;
length = Math.Min(end - start, lines[i + 1].Length - start);
string value = length > 0 ? lines[i + 1].Substring(start, length).Trim() : "";
dict.Add(key, value);
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}
使用正则表达式:
var dict = new Dictionary<string, string>();
try
{
var lines = email.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
int starts = 0;
while (!lines[starts + 1].StartsWith("-")) starts++;
for (int i = starts + 1; i < lines.Length; i += 3)
{
var keys = Regex.Matches(lines[i - 1], @"(?:^| )(\w+\s?)+");
var values = Regex.Matches(lines[i + 1], @"(?:^| )(\w+\s?)+");
if (keys.Count == values.Count)
for (int j = 0; j < keys.Count; j++)
dict.Add(keys[j].Value.Trim(), values[j].Value.Trim());
else // remove bug if value of first key in a line has no value
{
if (lines[i + 1].StartsWith(" "))
{
dict.Add(keys[0].Value.Trim(), "");
dict.Add(keys[1].Value.Trim(), values[0].Value.Trim());
}
else
{
dict.Add(keys[0].Value, values[0].Value.Trim());
dict.Add(keys[1].Value.Trim(), "");
}
}
}
}
catch (Exception ex)
{
throw new Exception("Email is not in correct format");
}
TA贡献1818条经验 获得超3个赞
这是一个假设您不需要标题的方法,信息按顺序和强制性出现。这不适用于包含空格或可选字段的数据。
foreach (MailItem mail in publicFolder.Items)
{
MessageBox.Show(mail.Body, "MailItem body");
// Split by line, remove dash lines.
var data = Regex.Split(mail.Body, @"\r?\n|\r")
.Where(l => !l.StartsWith('-'))
.ToList();
// Remove headers
for(var i = data.Count -2; lines >= 0; i -2)
{
data.RemoveAt(i);
}
// now data contains only the info you want in the order it was presented.
// Asuming info doesn't have spaces.
var result = data.SelectMany(d => d.Split(' '));
// WARNING: Missing info will not be present.
// {"GR332230", "0000232323", "GX3472", "1", "3411144"}
}
TA贡献2037条经验 获得超6个赞
这是我的尝试。我不知道电子邮件格式是否可以更改(行、列等)。
除了检查双空格(我的解决方案)之外,我想不出一种分隔列的简单方法。
class Program
{
static void Main(string[] args)
{
var emailBody = GetEmail();
using (var reader = new StringReader(emailBody))
{
var lines = new List<string>();
const int startingRow = 2; // Starting line to read from (start at Marketing ID line)
const int sectionItems = 4; // Header row (ex. Marketing ID & Local Number Line) + Dash Row + Value Row + New Line
// Add all lines to a list
string line = "";
while ((line = reader.ReadLine()) != null)
{
lines.Add(line.Trim()); // Add each line to the list and remove any leading or trailing spaces
}
for (var i = startingRow; i < lines.Count; i += sectionItems)
{
var currentLine = lines[i];
var indexToBeginSeparatingColumns = currentLine.IndexOf(" "); // The first time we see double spaces, we will use as the column delimiter, not the best solution but should work
var header1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
var header2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
currentLine = lines[i+2]; //Skip dash line
indexToBeginSeparatingColumns = currentLine.IndexOf(" ");
string value1 = "", value2 = "";
if (indexToBeginSeparatingColumns == -1) // Use case of there being no value in the 2nd column, could be better
{
value1 = currentLine.Trim();
}
else
{
value1 = currentLine.Substring(0, indexToBeginSeparatingColumns);
value2 = currentLine.Substring(indexToBeginSeparatingColumns, currentLine.Length - indexToBeginSeparatingColumns).Trim();
}
Console.WriteLine(string.Format("{0},{1},{2},{3}", header1, value1, header2, value2));
}
}
}
static string GetEmail()
{
return @"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
}
}
输出看起来像这样:
营销 ID,GR332230,本地编号,0000232323 调度代码,GX3472,逻辑代码,1 目的地 ID,3411144,目的地详细信息,
TA贡献1827条经验 获得超7个赞
用 Regex 做这件事不是一个好主意,因为它很容易忘记边缘情况,不容易理解,也不容易调试。很容易陷入正则表达式挂起 CPU 并超时的情况。(我还不能对其他答案发表任何评论。所以,在选择最终解决方案之前,请至少检查我的其他两个案例。)
在您的情况下,以下 Regex 解决方案适用于您提供的示例。但是,还有一些额外的限制:您需要确保非开始或非结束列中没有空值。或者,如果有两列以上且中间的任何一列为空,都会导致该行的名称和值不匹配。
不幸的是,我不能给你一个非正则表达式的解决方案,因为我不知道规范,例如:会有空格吗?会有 TAB 吗?每个字段的字符数是固定的还是灵活的?如果它是灵活的并且可以有空值,那么用什么样的规则来检测哪些列是空的?我假设它们很可能是由列名的长度定义的,并且只有空格作为分隔符。如果是这种情况,有两种方法可以解决它,两次通过 Regex 或编写自己的解析器。如果所有字段的长度都是固定的,那就更简单了:只需要使用子字符串来切割行,然后修剪它们。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
public class Program
{
public class Record{
public string Name {get;set;}
public string Value {get;set;}
}
public static void Main()
{
var regex = new Regex(@"(?<name>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<name>((?!-)[\w]+[ ]?)+)?)+(?:\r\n|\r|\n)(?>(?<splitters>(-+))(?>[ \t]+)?)+(?:\r\n|\r|\n)(?<value>((?!-)[\w]+[ ]?)*)(?>(?>[ \t]+)?(?<value>((?!-)[\w]+[ ]?)+)?)+", RegexOptions.Compiled);
var testingValue =
@"EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144";
var matches = regex.Matches(testingValue);
var rows = (
from match in matches.OfType<Match>()
let row = (
from grp in match.Groups.OfType<Group>()
select new {grp.Name, Captures = grp.Captures.OfType<Capture>().ToList()}
).ToDictionary(item=>item.Name, item=>item.Captures.OfType<Capture>().ToList())
let names = row.ContainsKey("name")? row["name"] : null
let splitters = row.ContainsKey("splitters")? row["splitters"] : null
let values = row.ContainsKey("value")? row["value"] : null
where names != null && splitters != null &&
names.Count == splitters.Count &&
(values==null || values.Count <= splitters.Count)
select new {Names = names, Values = values}
);
var records = new List<Record>();
foreach(var row in rows)
{
for(int i=0; i< row.Names.Count; i++)
{
records.Add(new Record{Name=row.Names[i].Value, Value=i < row.Values.Count ? row.Values[i].Value : ""});
}
}
foreach(var record in records)
{
Console.WriteLine(record.Name + " = " + record.Value);
}
}
}
输出:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID = 3411144
Destination details =
请注意,这也适用于此类消息:EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
GR332230 0000232323
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
输出:
Marketing ID = GR332230
Local Number = 0000232323
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
或这个:
EMAIL STARTING IN APRIL
Marketing ID Local Number
------------------- ----------------------
Dispatch Code Logic code
----------------- -------------------
GX3472 1
Destination ID Destination details
----------------- -------------------
3411144
输出:
Marketing ID =
Local Number =
Dispatch Code = GX3472
Logic code = 1
Destination ID =
Destination details = 3411144
- 4 回答
- 0 关注
- 94 浏览
添加回答
举报