/* splitter.c file for Splitter version 0.1
* Copyright (C) 2010 Alexey Osipov lion-simba@pridelands.ru
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <stdlib.h> //strtol
#include <stdio.h> //printf, stdin, stderr, stdout
#include <ctype.h> //isspace
#include <string.h> //strlen, etc
#include <unistd.h> //getopt
#include <linux/limits.h> //PATH_MAX
#include <errno.h> //errno
#define SPLITTER_VERSION_MAJOR 0
#define SPLITTER_VERSION_MINOR 1
#define SPLITTER_VERSION_REV 0
#define RAW_READ_BUFFER_SIZE 1024
#define STATE_WAITFORSIZE 0
#define STATE_WAITFORSPACE 1
#define STATE_WAITFORWORD 2
#define STATE_COMPAREWORD 3
#define STATE_CHANGEFILE 4
#define MIN(a,b) (((b) < (a)) ? (b) : (a))
//#define _DEBUG
#ifdef _DEBUG
#define D(arg) arg
#else
#define D(arg)
#endif
void print_usage(const char* progname);
int main(int argc, char** argv)
{
//empty string
char empty[] = "";
//arguments (strings)
char* s_word = NULL;
char* s_size = NULL;
char* s_input = NULL;
char* s_prefix = empty;
char* s_suffix = empty;
//arguments (integers)
long l_size = 0;
int s_word_len = 0;
//files
FILE* f_input = NULL;
FILE* f_output = NULL;
//helpers
int i;
int opt;
char* inval_pos = NULL;
char raw_read_buffer[RAW_READ_BUFFER_SIZE];
int readed = 0; //actually readed
int writed = 0; //actually writed
int output_file_index = 1;
int left_to_write = 0;
char s_output[PATH_MAX];
int state = STATE_WAITFORSIZE;
char* s_word_ptr = NULL;
while ((opt = getopt(argc, argv, "s:w:i:o:O:")) != -1)
{
switch (opt)
{
case 'w':
s_word = optarg;
break;
case 's':
s_size = optarg;
break;
case 'i':
s_input = optarg;
break;
case 'o':
s_prefix = optarg;
break;
case 'O':
s_suffix = optarg;
break;
default:
print_usage(argv[0]);
return 1;
}
}
//parameters checking...
if (!s_size || !s_word || !s_input)
{
print_usage(argv[0]);
return 1;
}
l_size = strtol(s_size, &inval_pos, 10);
if (*inval_pos != '\0')
{
//conversion failed
print_usage(argv[0]);
return 1;
}
if (l_size <= 0)
{
print_usage(argv[0]);
return 1;
}
s_word_len = strlen(s_word);
if (s_word_len == 0)
{
print_usage(argv[0]);
return 1;
}
//opening input file
f_input = NULL;
if (!strncmp(s_input, "-", 1))
{
f_input = stdin;
}
else
{
f_input = fopen(s_input, "r");
if (!f_input)
{
fprintf(stderr, "Open input file \"%s\" for read failed: %s\n", s_input, strerror(errno));
return 2;
}
}
left_to_write = l_size;
sprintf(s_output, "%s%d%s", s_prefix, output_file_index, s_suffix);
f_output = fopen(s_output, "w");
if (!f_output)
{
fprintf(stderr, "Open output file \"%s\" for write failed: %s\n", s_output, strerror(errno));
return 3;
}
D(fprintf(stderr, "Files opened. Ready to go...\n"));
//reading file by 1kb each time
while(!feof(f_input))
{
readed = fread(raw_read_buffer, 1, RAW_READ_BUFFER_SIZE, f_input);
D(fprintf(stderr, "Readed %d bytes.\n", readed));
while(readed > 0)
{
D(fprintf(stderr, "State = %d\n", state));
D(fprintf(stderr, "Buffer: "));
D(fwrite(raw_read_buffer, 1, readed, stderr));
D(fprintf(stderr, "\n"));
if (state == STATE_WAITFORSPACE)
{
//skip to first space
for(i = 0; i < readed; i++)
{
if (isspace(raw_read_buffer[i]))
break;
}
//write what before space to file
writed = fwrite(raw_read_buffer, 1, i, f_output);
if (i < readed)
{
state = STATE_WAITFORWORD;
}
}
else if (state == STATE_WAITFORWORD)
{
//skip to first word
for(i = 0; i < readed; i++)
{
if (!isspace(raw_read_buffer[i]))
break;
}
//write what before word to file
writed = fwrite(raw_read_buffer, 1, i, f_output);
if (i < readed)
{
s_word_ptr = s_word;
state = STATE_COMPAREWORD;
}
}
else if (state == STATE_COMPAREWORD)
{
for(i = 0; i < readed; i++)
{
if (*s_word_ptr == '\0' && isspace(raw_read_buffer[i]))
{
//not found
D(fprintf(stderr, "Compare: found!\n"));
state = STATE_CHANGEFILE;
break;
}
if (isspace(raw_read_buffer[i]))
{
//found
D(fprintf(stderr, "Compare: not found: end of word.\n"));
state = STATE_WAITFORSPACE;
break;
}
if (raw_read_buffer[i] != *s_word_ptr)
{
//not found
D(fprintf(stderr, "Compare: not found: wrong word.\n"));
state = STATE_WAITFORSPACE;
break;
}
s_word_ptr++;
}
//write what before word to file
if (i == readed)
i--;
writed = fwrite(raw_read_buffer, 1, i+1, f_output);
}
else if (state == STATE_WAITFORSIZE)
{
writed = fwrite(raw_read_buffer, 1, MIN(left_to_write, readed), f_output);
//decrement size, which we need
left_to_write -= writed;
if (left_to_write <= 0)
{
//search forward for specified word
state = STATE_WAITFORSPACE;
}
}
else
{
fprintf(stderr, "Unknown state: %d", state);
return 4;
}
D(fprintf(stderr, "Writed %d bytes.\n", writed));
//move buffer
for(i = writed; i < readed; i++)
raw_read_buffer[i-writed] = raw_read_buffer[i];
readed -= writed;
//open new output file if needed
if (state == STATE_CHANGEFILE)
{
fclose(f_output);
output_file_index++;
left_to_write = l_size;
sprintf(s_output, "%s%d%s", s_prefix, output_file_index, s_suffix);
f_output = fopen(s_output, "w");
if (!f_output)
{
fprintf(stderr, "Open output file \"%s\" for write failed: %s\n", s_output, strerror(errno));
return 3;
}
state = STATE_WAITFORSIZE;
}
}
}
//close output file
fclose(f_output);
//close input file
if (f_input != stdin)
fclose(f_input);
return 0;
}
void print_usage(const char* progname)
{
fprintf(stderr, "Splitter v%d.%d.%d\n\
Basic usage is:\n\
\n\
%s -s SIZE -w WORD -i INPUT [-o PREFIX] [-O SUFFIX]\n\
\n\
This command split text file INPUT into several files, which are:\n\
- have size >~ SIZE bytes;\n\
- end by WORD word;\n\
- have names like PREFIXnSUFFIX, where `n` is a file part number,\n\
starting from 1.\n\
\n\
If no PREFIX and/or SUFFIX given, only file part number is used.\n\
INPUT can by `-` to read from stdin.\n",
SPLITTER_VERSION_MAJOR, SPLITTER_VERSION_MINOR, SPLITTER_VERSION_REV,
progname);
}
Релиз кода произведен с разрешения заказчика (sers).