在以前公司的时候用MFC container写过一个处理CSV文件的简单的Parser。换到新公司之后公司有条要求,就是能用STL Container的就不用MFC container。于是刚刚用STL重写了一遍,如果大家觉得有用不妨用用。欢迎帮我查查错。
CSV格式指的是用逗号分隔的字符串(Comma sepatate value)。在读取简单文本数据时应用最为广泛。(数据复杂是一般存储为XML格式。)任何用只有一层结构的数据,或者Class,Object都可以很方便的用CSV读取。
在读写CSV时要注意逗号和引号,如果一列数据已经有一个逗号,就不能再简单的用逗号作分割符,这一列数据会被自动加上""。同样如果一列数据中如果用引号也会造成混淆。CSV会在没个引号后附加一个引号。
我写的Class CCSVLineParser主要用一下几个函数:GetAt(), SetAt(), GetFullString(), SetFullString(), size()。分别是读,写某一列,读写整行和得到总列数。我没有用operator overloading重载符号“[]"而用了GetAt, SetAt()是wo我扩展了一点SetAt()的功能。GetAt(int intIndex)返回所制定的列的字符,如果intIndex越界会throw exception。而SetAt()只有在index小于零时throw exception,如果所制定的列数index过大,CSV会自动括展至这一列。同时表示整行字符的变量也会更新。
把下面的一个字符存在.csv文件然后用excel打开,你会发现他是5列。和程序的接过一样
下面是测试程序:
#include \"stdafx.h\"
#include \"csvlineparser.h\"
// basic_string_erase.cpp
// compile with: /EHsc
#include <string>
#include <iostream>
int _tmain(int argc, _TCHAR* argv[])
{
using namespace std;
string str1 ( \"a,\\"b\\"\\"sassaa\\",\\"aw\\"\\",\\"\\"w\\"\\"jj\\",asd,\\"axsxs\\"\" );
CCSVLineParser csv(str1);
try
{
for (int i = 0; i < (int)csv.size()+1; i++)
{
cout << csv.GetAt(i) << endl;
}
}
catch(CCSVLineParser::Range ex)
{
cout << ex.sText << endl;
}
catch(...)
{
}
cout << str1 << endl;
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;
csv.SetAt(6, string(\"haha\\",\\"haha\"));
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;
}
下面是.h和.cpp
CSVLineParser.h
#pragma once
#include <vector>
#include <string>
using namespace std;
class CCSVColumn
{
public:
CCSVColumn(string sDisplay);
CCSVColumn(string sActual, string sDisplay): m_sActual(sActual), m_sDisplay(sDisplay) {}
private:
string m_sDisplay;
string m_sActual;
friend class CCSVLineParser;
};
class CCSVLineParser
{
public:
CCSVLineParser(string sFullString = \"\");
~CCSVLineParser(void);
size_t size(void);
string GetAt(int intIndex, bool bActual = false);
void SetFullString(string sLine);
void SetAt(int intIndex, string & sColumn);
string GetFullString(bool bActual = true);
bool swapColumn(int intCol1, int intCol2);
class Range
{
public:
Range() : sText(\"Out of range\") {};
string sText;
};
private:
void Read(string sFullString);
vector<CCSVColumn> m_vecElements;
static void PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber);
friend class CCSVColumn;
};
CSVLineParser.cpp
#include \"StdAfx.h\"
#include \".\csvlineparser.h\"
//===============================================================================
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
//===============================================================================
CCSVColumn::CCSVColumn(string sDisplay) : m_sDisplay(sDisplay)
{
int intStartPosition=0, intQuoteLocation;
if ((int)sDisplay.find(',') >= 0)
m_sActual += \"\\"\";
while ((intQuoteLocation = (int)sDisplay.find('\"', intStartPosition)) >= 0)
{
m_sActual += string(sDisplay, intStartPosition, intQuoteLocation);
m_sActual += '\"';
intStartPosition = intQuoteLocation+1;
}
m_sActual += string(sDisplay, intStartPosition, (int)sDisplay.size());
if ((int)sDisplay.find(',') >= 0)
m_sActual += \"\\"\";
}
CCSVLineParser::CCSVLineParser(string sFullString)
{
Read(sFullString);
}
CCSVLineParser::~CCSVLineParser(void)
{
}
void CCSVLineParser::SetFullString(string strLine)
{
Read(strLine);
}
void CCSVLineParser::Read(string sFullString)
{
int intFindLocation, intStartLocation, intQuoteNumber;
string sRemaining = sFullString, sDelimiter, sActual, sDisplay, sColumn;
char tmp[200];
m_vecElements.clear();
if (sRemaining.empty())
return;
if (sRemaining.at(0) != '\"')
sDelimiter = \",\";
else
{
sDelimiter = \"\\",\";
sRemaining.erase(0, 1);
}
intStartLocation = 0;
PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
while ((intFindLocation = (int)sRemaining.find(sDelimiter)) >= 0)
{
sDisplay= string(sRemaining, 0, intFindLocation);
sDisplay = sColumn + sDisplay;
sRemaining.erase(0, intFindLocation+sDelimiter.size());
if (sDelimiter.size() > 1)
intQuoteNumber += 2;
sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
strcpy(tmp, sActual.c_str());
m_vecElements.push_back(CCSVColumn(sActual, sDisplay));
intStartLocation += (int)sDisplay.size()+intQuoteNumber+1;
// looking for next delimiter
if (*sRemaining.begin() != '\"')
sDelimiter = \",\";
else
{
sDelimiter = \"\\",\";
sRemaining.erase(0, 1);
}
PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
}
// grab the last column
if (sDelimiter.size() > 1)
{
sRemaining.erase(sRemaining.size()-1 , sRemaining.size()-1);
intQuoteNumber += 2;
}
sDisplay = sColumn + sRemaining;
sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
m_vecElements.push_back(CCSVColumn(sActual, sDisplay));
return;
}
size_t CCSVLineParser::size(void)
{
return m_vecElements.size();
}
string CCSVLineParser::GetAt(int intIndex, bool bActual)
{
if (intIndex < 0 || intIndex >= (int)m_vecElements.size())
throw Range();
return bActual ? m_vecElements[intIndex].m_sActual : m_vecElements[intIndex].m_sDisplay;
}
void CCSVLineParser::PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber)
{
int intQuoteLocation, intDelimiterLocation;
char chararQuote[] = \"\\"\\"\";
string sPrefix;
sColumn.clear();
intQuoteNumber = 0;
if (sFullString.empty() || sDelimiter.empty())
return;
intQuoteLocation = (int)sFullString.find(chararQuote);
intDelimiterLocation = (int)sFullString.find(sDelimiter);
while (intQuoteLocation >= 0 && (intQuoteLocation < intDelimiterLocation || intDelimiterLocation < 0))
{
sPrefix = sFullString;
sPrefix.erase(intQuoteLocation+1 , sPrefix.size()-1 );
sFullString.erase(0, intQuoteLocation+sizeof(chararQuote)/sizeof(char)-1);
sColumn += sPrefix;
intQuoteLocation = (int)sFullString.find(chararQuote);
intDelimiterLocation = (int)sFullString.find(sDelimiter);
intQuoteNumber++;
}
return;
}
void CCSVLineParser::SetAt(int intIndex, string & sColumn)
{
if (intIndex < 0)
throw Range();
// automatically expand to the column
while ((int)m_vecElements.size() < intIndex+1)
m_vecElements.push_back(CCSVColumn(\"\"));
m_vecElements[intIndex] = CCSVColumn(sColumn);
}
string CCSVLineParser::GetFullString(bool bActual)
{
string sFullString;
for (int intColumnCount = 0; intColumnCount < (int)size(); intColumnCount++)
{
if (intColumnCount > 0)
sFullString += ',';
if (bActual)
sFullString += m_vecElements[intColumnCount].m_sActual;
else
sFullString += m_vecElements[intColumnCount].m_sDisplay;
}
return sFullString;
}
// column has to exist.
bool CCSVLineParser::swapColumn(int intCol1, int intCol2)
{
if (intCol1 < 0 || intCol2 < 0 || intCol1 >= (int)size() || intCol2 >= (int)size())
return false;
if (intCol1 == intCol2)
return true;
swap(m_vecElements[intCol1], m_vecElements[intCol2]);
string a=m_vecElements[intCol1].m_sDisplay, b=m_vecElements[intCol2].m_sDisplay;
return true;
}
[此贴子已经被作者于2006-7-15 3:04:38编辑过]