/* Dupmerge - Reclaim disk space by linking identical files together * * This program reads from standard input a list of files (such * as that generated by "find . -print") and discovers which files are * identical. Dupmerge unlinks one file of each identical pair and * recreates its path name as a link to the other. * * Non-plain files in the input (directories, pipes, devices, etc) * are ignored. Identical files must be on the same file system to be linked. * * Dupmerge prefers to keep the older of two identical files, as the older * timestamp is more likely to be the correct one given that many * copy utilities (e.g., 'cp') do not by default preserve modification * times. * * Dupmerge works by quicksorting a list of path names, with the * actual unlinking and relinking steps performed as side effects of * the comparison function. The results of the sort are discarded. * * Command line arguments: * -n Suppress the actual unlinking and relinking * -q Operate in quiet mode (otherwise, relinks are displayed on stdout) * * 12 February 1998 Phil Karn, karn@ka9q.ampr.org * Copyright Phil Karn. May be used under the terms of the GNU Public License. */ #include #include #include #include #include int fcmp(const void *a,const void *b); int Nodo = 0; int Quiet = 0; int Files_deleted = 0; int Blocks_reclaimed = 0; main(int argc,char *argv[]) { char **names,buf[BUFSIZ],*cp; int nfiles,i; FILE *tmp; while((i = getopt(argc,argv,"nq")) != EOF){ switch(i){ case 'n': Nodo = 1; break; case 'q': Quiet = 1; break; } } /* Read list of file names into temp file and count */ tmp = tmpfile(); nfiles = 0; while(fgets(buf,sizeof(buf),stdin),!feof(stdin)){ nfiles++; fputs(buf,tmp); } /* Now that we know how many there are, allocate space and re-read */ rewind(tmp); if((names = (char **)malloc(nfiles*sizeof(char *))) == NULL){ fprintf(stderr,"%s: Out of memory\n",argv[0]); exit(1); } for(i=0;i sb.st_size) return 1; if(sa.st_dev == sb.st_dev && sa.st_ino == sb.st_ino) return 0; /* Files are linked */ /* We now know both files exist, are plain files, are the same size, * and are not already linked, so compare their contents */ if((fa = fopen(filea,"r")) == NULL) return -1; /* Unreadable files are "less than" */ if((fb = fopen(fileb,"r")) == NULL){ fclose(fa); return 1; } rval = 0; while((c1 = fgetc(fa)) != EOF && (c2 = fgetc(fb)) != EOF){ if(c1 < c2){ rval = -1; break; } else if(c1 > c2){ rval = 1; break; } } fclose(fa); fclose(fb); if(rval == 0 && sa.st_dev == sb.st_dev){ /* Files are identical and on the same device, so link them. * We prefer to keep the older copy, or if they're the * same date, the one with more links */ if(sb.st_mtime > sa.st_mtime || sb.st_nlink < sa.st_nlink){ if(sb.st_nlink == 1){ Files_deleted++; Blocks_reclaimed += sb.st_blocks; } if(!Nodo && unlink(fileb) == -1){ fprintf(stderr,"unlink(%s) failed\n",fileb); perror("unlink"); exit(1); } if(!Quiet) printf("ln %s %s: %d->%d, %d->%d\n",filea,fileb,sa.st_nlink,sa.st_nlink+1, sb.st_nlink,sb.st_nlink-1); if(!Nodo && link(filea,fileb) == -1){ fprintf(stderr,"link(%s,%s) failed\n",filea,fileb); perror("link"); exit(1); } } else { if(sa.st_nlink == 1){ Files_deleted++; Blocks_reclaimed += sa.st_blocks; } if(!Nodo && unlink(filea) == -1){ fprintf(stderr,"unlink(%s) failed\n",filea); perror("unlink"); exit(1); } if(!Quiet) printf("ln %s %s: %d->%d, %d->%d\n",fileb,filea,sb.st_nlink,sb.st_nlink+1, sa.st_nlink,sa.st_nlink-1); if(!Nodo && link(fileb,filea) == -1){ fprintf(stderr,"link(%s,%s) failed\n",fileb,filea); perror("link"); exit(1); } } } return rval; }