Splitting and Merging large files (size in GB) in Java -


suppose,

  • i splitting 2590400 kb (approx 2.5 gb) file in 30 parts.

  • it produce 30 files size of 86347 kb.
    seems correct, 2590400/30 = 86346.66666667

  • now if merge parts (30) again producing file of 3453873 kb file, should 2590410 kb.

can me why difference there? using below code merge , split files.

splitfile.java

import java.io.bufferedoutputstream; import java.io.bufferedreader; import java.io.bufferedwriter; import java.io.file; import java.io.fileinputstream; import java.io.fileoutputstream; import java.io.filewriter; import java.io.ioexception; import java.io.inputstreamreader; import java.io.randomaccessfile;  /**  * @author vishal.zanzrukia  *   */ public class splitfile {      public static final string input_file = "d:\\me\\projects\\input\\file\\path.txt";     public static final int number_of_output_files = 30;     public static final string file_suffix = ".txt";      /**      * split file      *       * @throws exception      */     static void splitfile() throws exception{          file inputfile = new file(input_file + "_splits");         inputfile.mkdir();          randomaccessfile raf = new randomaccessfile(input_file, "r");          long sourcesize = raf.length();         long bytespersplit = sourcesize / number_of_output_files;         long remainingbytes = sourcesize % number_of_output_files;          int maxreadbuffersize = 8 * 1024; // 8kb         (int destix = 1; destix <= number_of_output_files; destix++) {             bufferedoutputstream bw = new bufferedoutputstream(new fileoutputstream(input_file + "_splits\\split." + destix + file_suffix));             if (bytespersplit > maxreadbuffersize) {                 long numreads = bytespersplit / maxreadbuffersize;                 long numremainingread = bytespersplit % maxreadbuffersize;                 (int = 0; < numreads; i++) {                     readwrite(raf, bw, maxreadbuffersize);                 }                 if (numremainingread > 0) {                     readwrite(raf, bw, numremainingread);                 }             } else {                 readwrite(raf, bw, bytespersplit);             }             bw.close();         }         if (remainingbytes > 0) {             bufferedoutputstream bw = new bufferedoutputstream(new fileoutputstream("split." + number_of_output_files + 1));             readwrite(raf, bw, remainingbytes);             bw.close();         }         raf.close();     }      /**      * join file      *       * @throws exception      */     static void joinfiles() throws exception{         int maxreadbuffersize = 8 * 1024;           bufferedoutputstream bw = new bufferedoutputstream(new fileoutputstream(input_file + "_splits\\fulljoin" + file_suffix));         file inputfiledir = new file(input_file + "_splits");         randomaccessfile raf = null;         if(inputfiledir.isdirectory()){             for(file file : inputfiledir.listfiles()){                 raf = new randomaccessfile(file, "r");                 long numreads = raf.length() / maxreadbuffersize;                 long numremainingread = raf.length()  % maxreadbuffersize;                 (int = 0; < numreads; i++) {                     readwrite(raf, bw, maxreadbuffersize);                 }                 if (numremainingread > 0) {                     readwrite(raf, bw, numremainingread);                 }                 raf.close();             }         }         bw.close();     }      public static void mergefiles() {          file[] files = new file[number_of_output_files];         for(int i=1;i<=number_of_output_files;i++){             files[i-1] = new file(input_file + "_splits\\split."+i+file_suffix);         }          string mergedfilepath = input_file + "_splits\\fulljoin" + file_suffix;           file mergedfile = new file(mergedfilepath);          mergefiles(files, mergedfile);     }      public static void mergefiles(file[] files, file mergedfile) {          filewriter fstream = null;         bufferedwriter out = null;         try {             fstream = new filewriter(mergedfile, true);              out = new bufferedwriter(fstream);         } catch (ioexception e1) {             e1.printstacktrace();         }          (file f : files) {             system.out.println("merging: " + f.getname());             fileinputstream fis;             try {                 fis = new fileinputstream(f);                 bufferedreader in = new bufferedreader(new inputstreamreader(fis));                  string aline;                 while ((aline = in.readline()) != null) {                     out.write(aline);                     out.newline();                 }                  in.close();             } catch (ioexception e) {                 e.printstacktrace();             }         }          try {             out.close();         } catch (ioexception e) {             e.printstacktrace();         }      }      public static void main(string[] args) throws exception { //      splitfile();         mergefiles();     }      static void readwrite(randomaccessfile raf, bufferedoutputstream bw, long numbytes) throws ioexception {         byte[] buf = new byte[(int) numbytes];         int val = raf.read(buf);         if (val != -1) {             bw.write(buf);         }     } } 

use joinfiles method: don't try read file line-by-line using reader if want keep was, because line endings may differ platform.

instead read them binary file using inputstream or randomaccessfile , write using outputstream.

the problem in joinfiles method used file.listfiles() makes no guarantees order in files returned.

i combined mergefiles() code joinfiles() make work (remember invoke joinfiles() instead of mergefiles() main method)

static void joinfiles(file[] files) throws exception {     int maxreadbuffersize = 8 * 1024;      bufferedoutputstream bw = new bufferedoutputstream(new fileoutputstream(input_file + "_splits\\fulljoin"             + file_suffix));      randomaccessfile raf = null;     (file file : files) {         raf = new randomaccessfile(file, "r");         long numreads = raf.length() / maxreadbuffersize;         long numremainingread = raf.length() % maxreadbuffersize;         (int = 0; < numreads; i++) {             readwrite(raf, bw, maxreadbuffersize);         }         if (numremainingread > 0) {             readwrite(raf, bw, numremainingread);         }         raf.close();      }     bw.close(); }  public static void joinfiles() throws exception {      file[] files = new file[number_of_output_files];     (int = 1; <= number_of_output_files; i++) {         files[i - 1] = new file(input_file + "_splits\\split." + + file_suffix);     }      joinfiles(files); } 

Comments

Popular posts from this blog

Android : Making Listview full screen -

javascript - Parse JSON from the body of the POST -

javascript - How to Hide Date Menu from Datepicker in yii2 -