| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 3243 人关注过本帖, 1 人收藏
标题:CSV合并的问题
只看楼主 加入收藏
wube
Rank: 12Rank: 12Rank: 12
等 级:贵宾
威 望:23
帖 子:1820
专家分:3681
注 册:2011-3-24
收藏
得分:0 
我先貼上一版的代碼,版主有空幫看看哪裡會導致內存資料會被覆蓋的問題.新版的還沒寫完.
StdAfx.h
程序代码:
// stdafx.h : include file for standard system include files,
//  or project specific include files that are used frequently, but
//      are changed infrequently
//

#if !defined(AFX_STDAFX_H__3652E740_3773_441A_9233_942284587DB6__INCLUDED_)
#define AFX_STDAFX_H__3652E740_3773_441A_9233_942284587DB6__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#define WIN32_LEAN_AND_MEAN        // Exclude rarely-used stuff from Windows headers

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <time.h>
#include <io.h>
#include <sys/stat.h>

#define IC 1024//16384
#define A 2048
#define B 1024
#define C 8
#define Max_Size 256
#define Min_Size 32

extern char FileList[Max_Size][Max_Size];
extern unsigned int FileCount;
extern float TestResult[IC][B];
extern int My_Max_x;
extern int My_Max_y;
extern int My_Min_x;
extern int My_Min_y;
extern int ErrorCoord;
extern time_t start_tm,finish_tm;

#define Test_policy "Never merge tests with identical test number if test name not matching"
#define Outlier_removal "None (keep all data)"
#define Statistics_computation "From samples data (if any)- otherwise from summary"
#define Binning_computation "From summary data (if any)- otherwise from samples"
#define Cp_Cpk_computation "Use standard Sigma formula"
#define Mean_drift_formula "Percentage of value drift"
#define Parts_processed "All Data / parts (any Bin)"
#define Data_from_Site "All sites"

#define FOREGROUND_WHITE (FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY)

// TODO: reference additional headers your program requires here

//{{AFX_INSERT_LOCATION}}
// Microsoft Visual C++ will insert additional declarations immediately before the previous line.

#endif // !defined(AFX_STDAFX_H__3652E740_3773_441A_9233_942284587DB6__INCLUDED_)


stdafx.cpp
程序代码:
// stdafx.cpp : source file that includes just the standard includes
//    Analysis.pch will be the pre-compiled header
//    stdafx.obj will contain the pre-compiled type information

#include "stdafx.h"

// TODO: reference any additional headers you need in STDAFX.H
// and not in this file

char FileList[Max_Size][Max_Size]={0};
unsigned int FileCount=0;
float TestResult[IC][B]={0};
int My_Max_x=0;
int My_Max_y=0;
int My_Min_x=0;
int My_Min_y=0;
time_t start_tm=0;
time_t finish_tm=0;
int ErrorCoord=0;

不要選我當版主
2015-03-11 20:52
wube
Rank: 12Rank: 12Rank: 12
等 级:贵宾
威 望:23
帖 子:1820
专家分:3681
注 册:2011-3-24
收藏
得分:0 
太長貼不上來.
111.rar (10.01 KB)

不要選我當版主
2015-03-11 20:54
wube
Rank: 12Rank: 12Rank: 12
等 级:贵宾
威 望:23
帖 子:1820
专家分:3681
注 册:2011-3-24
收藏
得分:0 
C#也太方便了吧~都不用事先聲明陣列大小就會自動增長~
作弊阿~簡直是作弊阿~
图片附件: 游客没有浏览图片的权限,请 登录注册


已經比VB還要逆天了~

[ 本帖最后由 wube 于 2015-3-11 22:19 编辑 ]

不要選我當版主
2015-03-11 22:18
TonyDeng
Rank: 20Rank: 20Rank: 20Rank: 20Rank: 20
等 级:贵宾
威 望:304
帖 子:25859
专家分:48889
注 册:2011-6-22
收藏
得分:0 
不是作弊啊,那個就是C++標準庫中的vector功能啊,動態數組,不需要用C99的VLA的,VLA也支撐不到那麽大的數組。我這個版本的程序已經沒仔細調整容器的初始大小了,實際執行時會有若干次重置大小的耗時操作,不過速度還算滿意,先不管它了。

授人以渔,不授人以鱼。
2015-03-11 23:58
TonyDeng
Rank: 20Rank: 20Rank: 20Rank: 20Rank: 20
等 级:贵宾
威 望:304
帖 子:25859
专家分:48889
注 册:2011-6-22
收藏
得分:0 
沒下載附件,瞄了一下上面兩樓,感覺用了很多底層的函數,但那正是容易出錯的東西。不是越往底部走效率越高的(比如有人動輒要匯編或內嵌匯編),那絕對是誤區。內存管理,永遠是C的老大難問題,避之唯恐不及,若非不得已,我都不寫和調試那方面的代碼。我的宗旨是,寫程序爲了解決問題,而不是爲了顯擺技術。

編程規範有兩條原則我是深以爲然的:
1.寫直接代碼;
2.優化留到最後,避免過早及不必要的優化。

授人以渔,不授人以鱼。
2015-03-12 00:41
TonyDeng
Rank: 20Rank: 20Rank: 20Rank: 20Rank: 20
等 级:贵宾
威 望:304
帖 子:25859
专家分:48889
注 册:2011-6-22
收藏
得分:0 
根據上面實現的方案,可以用較傳統的C形式重做一個,我寫好貼給你看。當然,我不會用所謂的純C,某些語法會用値得推薦的C++形式,比如就地聲明語法之類,但思想絕對是C的面向過程。

授人以渔,不授人以鱼。
2015-03-12 01:03
TonyDeng
Rank: 20Rank: 20Rank: 20Rank: 20Rank: 20
等 级:贵宾
威 望:304
帖 子:25859
专家分:48889
注 册:2011-6-22
收藏
得分:0 
程序代码:
#include <Windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>

// 全局常數
const size_t Max_Files = 2000;      // 處理文檔的最大數目
const size_t Max_Lines = 10000;     // 結果文檔的最大行數
const size_t Max_Width = 100000;    // 結果文檔每行最大的字符數
const char* File_Type = "csv";      // 文檔擴展名字符串

// 數據結構
struct FileList
{
    size_t Length;
    char FileName[Max_Files][FILENAME_MAX];
};
struct DataRecord
{
    size_t Length;
    char Line[Max_Lines][Max_Width];
};

// 函數原型
void ShowTips(const char* programName);
void Pause(void);
void Release(void);
size_t GetFilesList(const char* path);
void Merger(const char* sourceFileName);

// 全局數據
FileList* TheFileList = NULL;
DataRecord* TheResultData = NULL;

// 程序主入口
int main(int argc, char* argv[])
{
    if (argc < 4)
    {
        ShowTips(argv[0]);
        Pause();
        return EXIT_SUCCESS;
    }

    if (GetFilesList(argv[2]) != 0)
    {
        TheResultData = (DataRecord*)calloc(1, sizeof(DataRecord));
        if (TheResultData == NULL)
        {
            printf_s("內存資源不足\n");
            Pause();
            return EXIT_FAILURE;
        }
        char fileName[FILENAME_MAX];
        for (size_t index = 0; index < TheFileList->Length; ++index)
        {
            sprintf_s(fileName, "%s\\%s", argv[2], TheFileList->FileName[index]);
            printf_s("正在處理%s, 請稍候...\n", TheFileList->FileName[index]);
            Merger(fileName);
        }
        sprintf_s(fileName, "%s.%s", argv[1], File_Type);
        printf_s("\n生成輸出文檔%s\n", fileName);
        FILE* targetFile;
        if (fopen_s(&targetFile, fileName, "wt") != 0)
        {
            printf_s("建立輸出文件%s失敗\n", argv[1]);
            Pause();
            return EXIT_FAILURE;
        }
        for (size_t line = 0; line < TheResultData->Length; ++line)
        {
            fprintf_s(targetFile, "%s\n", TheResultData->Line[line]);
        }
        fclose(targetFile);
    }

    atexit(Release);
    Pause();
    return EXIT_SUCCESS;
}

// 提取不帶路徑和不帶擴展名的文件名字符串
char* GetFileNameWithoutExtension(char* fileName)
{
    char* p = strrchr(fileName, '.');
    if (p != NULL)
    {
        *p = '\0';
    }
    p = strrchr(fileName, '\\');
    return (p != NULL) ? (p + 1) : fileName;
}

// 顯示命令行語法幫助
void ShowTips(const char* programName)
{
    char fileName[FILENAME_MAX];
    strcpy_s(fileName, sizeof(fileName), programName);
    strcpy_s(fileName, sizeof(fileName), GetFileNameWithoutExtension(fileName));
    printf_s("格式: %s 輸出文檔名 \"數據源文檔所在路徑\" 標題行數\n", fileName);
    printf_s("例: %s AAAA \"Data\" 6\n", fileName);
    printf_s("    數據源文檔放在當前目錄的子目錄Data中,標題佔用6行\n");
}

// 提示並等待用戶按鍵
void Pause(void)
{
    printf_s("\n按任意鍵結束程序...");
    _getch();
}

// 釋放全局資源
void Release(void)
{
    if (TheFileList != NULL)
    {
        free(TheFileList);
    }
    if (TheResultData != NULL)
    {
        free(TheResultData);
    }
}

// 獲取指定目錄中的文件列表
size_t GetFilesList(const char* path)
{
    TheFileList = (FileList*)calloc(1, sizeof(FileList));
    if (TheFileList == NULL)
    {
        return 0;
    }

    char fileName[FILENAME_MAX];
    sprintf_s(fileName, "%s\\*.%s", path, File_Type);
    WIN32_FIND_DATA findFileData;
    HANDLE listFile = FindFirstFileA(fileName, &findFileData);
    if (listFile != INVALID_HANDLE_VALUE)
    {
        do
        {
            strcpy_s(TheFileList->FileName[TheFileList->Length++], FILENAME_MAX, findFileData.cFileName);
        } while (FindNextFileA(listFile, &findFileData));
    }

    return TheFileList->Length;
}

// 合併指定源文檔的數據
void Merger(const char* sourceFileName)
{
    char* buffer = (char*)malloc(Max_Width);
    FILE* sourceFile;
    fopen_s(&sourceFile, sourceFileName, "rt");
    size_t line = 0;
    while (fgets(buffer, Max_Width - 1, sourceFile))
    {
        char* p = strrchr(buffer, '\n');
        *p = '\0';
        strcat_s(TheResultData->Line[line++], Max_Width, buffer);
        if (TheResultData->Length < line)
        {
            ++TheResultData->Length;
        }
    }
    fclose(sourceFile);
    free(buffer);
}

收到的鲜花
  • wube2015-03-12 11:42 送鲜花  10朵   附言:好文章

授人以渔,不授人以鱼。
2015-03-12 04:37
wube
Rank: 12Rank: 12Rank: 12
等 级:贵宾
威 望:23
帖 子:1820
专家分:3681
注 册:2011-3-24
收藏
得分:0 
其實關鍵就是,到內存串接後一次輸出,照這思路,我也能改寫成VB版。
用的指令都還蠻淺顯易懂的,而且看得出不是VC6的,因為指令後有_S,
記得是.NET版修改用以增加程式運作安全性的。

不過寫法感覺大致上跟我寫的方式差不多。哈....

[ 本帖最后由 wube 于 2015-3-12 05:25 编辑 ]

不要選我當版主
2015-03-12 05:23
wmf2014
Rank: 19Rank: 19Rank: 19Rank: 19Rank: 19Rank: 19
等 级:贵宾
威 望:216
帖 子:2039
专家分:11273
注 册:2014-12-6
收藏
得分:0 
各文件行数不同,简单叠加应该不行吧,要补“,”的,否则会错位。比如,最后一个文件是第1000个文件,它比前面的数据多一行,就应该要在前面的数据里补1000个逗号的,我看T版主文件合并部分好像没有这类处理,不知道是怎么解决的。

能编个毛线衣吗?
2015-03-12 07:31
wube
Rank: 12Rank: 12Rank: 12
等 级:贵宾
威 望:23
帖 子:1820
专家分:3681
注 册:2011-3-24
收藏
得分:0 
仿T大的VB6版本...

程序代码:
Option Explicit

Dim TargetFileName As String, SourceFolder As String
Dim FileNameArray() As String, Tmp() As String

Private Const FileType = ".csv"

Sub Main()
Dim TempString As String, Temp() As String, Temp1() As String

    If Trim$(Command) <> "" Then
        Temp = Split(Command, " ")
        If UBound(Temp) = 1 Then
        
            TargetFileName = Trim$(Temp(0))
            SourceFolder = IIf(Right(Trim$(Temp(1)), 1) = "\", Trim$(Temp(1)), Trim$(Temp(1) & "\"))
            
            If ((IsFolderExist(SourceFolder) = True) And (Right$(TargetFileName, 4) = FileType)) Then
                If GetFileList(SourceFolder) > 0 Then
                    Call MergeCSV
                    Call OutputData(TargetFileName)
                    MsgBox "OK"
                End If
            End If
        Else
            MsgBox "Command Count Error !" & vbCrLf & Command
        End If
    Else
        MsgBox "Command is Empty !"
    End If
    
    Erase FileNameArray, Tmp
    
    End
    
End Sub

Private Function GetFileList(strFolderName As String) As Integer
Dim TempString As String, ExtendName As String
Dim varFSO As Variant, varFolder As Variant, varFile As Variant
Dim i As Integer

    GetFileList = 0: i = 0
    
    Set varFSO = CreateObject("Scripting.FileSystemObject")
    Set varFolder = varFSO.GetFolder(strFolderName)
    
    For Each varFile In varFolder.Files
        TempString = Trim(Mid(varFile, InStrRev(varFile, "\") + 1))
        If InStr(TempString, ".") <> 0 Then
            ExtendName = Trim$(Mid$(TempString, InStrRev(TempString, ".")))
            If UCase$(ExtendName) = UCase$(FileType) Then
                ReDim Preserve FileNameArray(i)
                FileNameArray(i) = varFile
                i = i + 1
            End If
        End If
    Next
    GetFileList = i
    
End Function

Private Sub MergeCSV()
Dim TempString As String
Dim FileNum As Integer, i As Integer, j As Integer
    
    ReDim Tmp(0)
    For i = 0 To UBound(FileNameArray)
        FileNum = FreeFile: j = 0
        Open FileNameArray(i) For Input As #FileNum
            Do While Not EOF(FileNum)
                Line Input #FileNum, TempString
                If i = 0 Then ReDim Preserve Tmp(j)
                Tmp(j) = Tmp(j) & TempString
                j = j + 1
                DoEvents
            Loop
        Close #FileNum
        DoEvents
    Next i
    
End Sub

Private Sub OutputData(TF As String)
Dim FileNum As Integer, i As Integer

    FileNum = FreeFile: i = 0
    Open TF For Output As #FileNum
        For i = 0 To UBound(Tmp)
            Print #FileNum, Tmp(i)
        Next i
    Close #FileNum
    
End Sub

Public Function IsFolderExist(strFolderName As String) As Boolean
Dim varFSO As Variant, TempString As String
    Set varFSO = CreateObject("Scripting.FileSystemObject")
    IsFolderExist = varFSO.FolderExists(strFolderName)
    Set varFSO = Nothing
End Function


不要選我當版主
2015-03-12 10:17
快速回复:CSV合并的问题
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.039430 second(s), 9 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved