summaryrefslogblamecommitdiff
path: root/fs/orangefs/dir.c
blob: f30b6ecacdd19595cca35f8a6711eaedc3c0ef04 (plain) (tree)
1
2
3
4
5
6
7
8
9






                                                            

                            
 
  







                                                                    
   
                                                  
                                                                       

              

                                                           
                        








                                                                    
 



















                                                                           
 
                                   

                                                                          

                                                                       




                                                              
 

                       
 
                                                                 







                                                                       

                                    












                                                                   
                                                        
 
                  











                                                                    
                   






                                                                        
 
                                   
 
                                                  
                                                         
                                    
         




                                                                              

                                     


                   

 

                                                                
   
                                                                       
 

                         



                                                                       



                                                    

                                                                              
                            

                                                            





                                   




                                                          



                               
                                          





                                                               
                                                                  

                                                    
                                                               
 
                                                   


                               



                                                                             
                                       
                                                               

                                                     





                                                                  


                                                   


                                                    
                                                                                            





                                                            
                                                   






                                                                         

                                                 
                                                        
                                                                         


                                                                                





                                                                          
                              





                                                                    

                                                      







                                                                        

         

                                                                                
                                
                                    

                                                                             


                                                             
                                                                     


                                                                



                                        
          
                                                                

                                             






                                                                               
                         








                                                                                
                         

         
          
                                                                       


                                                                
                                              

                             


                                                 
                                                                
                          
                                                           
                   

                                                                        
                                                      
                                                                   

                                              

                                                                  

                                           





                                                               

                                                                               

                              
                           
                                              



                                                    

         
          



                                                                
                                                 
                                                 




                                               
                                                             
                           
                                              

                                                                                    

         
                   






                                                                

                           
                                                                               


                   
                                                                    







                                                                
                                         


                 
                                                                       
 
                                    



                                  

                                                          
                                 


                                        
  
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

#include "protocol.h"
#include "orangefs-kernel.h"
#include "orangefs-bufmap.h"

/*
 * decode routine used by kmod to deal with the blob sent from
 * userspace for readdirs. The blob contains zero or more of these
 * sub-blobs:
 *   __u32 - represents length of the character string that follows.
 *   string - between 1 and ORANGEFS_NAME_MAX bytes long.
 *   padding - (if needed) to cause the __u32 plus the string to be
 *             eight byte aligned.
 *   khandle - sizeof(khandle) bytes.
 */
static long decode_dirents(char *ptr, size_t size,
                           struct orangefs_readdir_response_s *readdir)
{
	int i;
	struct orangefs_readdir_response_s *rd =
		(struct orangefs_readdir_response_s *) ptr;
	char *buf = ptr;
	int khandle_size = sizeof(struct orangefs_khandle);
	size_t offset = offsetof(struct orangefs_readdir_response_s,
				dirent_array);
	/* 8 reflects eight byte alignment */
	int smallest_blob = khandle_size + 8;
	__u32 len;
	int aligned_len;
	int sizeof_u32 = sizeof(__u32);
	long ret;

	gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);

	/* size is = offset on empty dirs, > offset on non-empty dirs... */
	if (size < offset) {
		gossip_err("%s: size:%zu: offset:%zu:\n",
			   __func__,
			   size,
			   offset);
		ret = -EINVAL;
		goto out;
	}

        if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
		gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
			   __func__,
			   size,
			   readdir->orangefs_dirent_outcount);
		ret = -EINVAL;
		goto out;
	}

	readdir->token = rd->token;
	readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
	readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
					sizeof(*readdir->dirent_array),
					GFP_KERNEL);
	if (readdir->dirent_array == NULL) {
		gossip_err("%s: kcalloc failed.\n", __func__);
		ret = -ENOMEM;
		goto out;
	}

	buf += offset;
	size -= offset;

	for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
		if (size < smallest_blob) {
			gossip_err("%s: size:%zu: smallest_blob:%d:\n",
				   __func__,
				   size,
				   smallest_blob);
			ret = -EINVAL;
			goto free;
		}

		len = *(__u32 *)buf;
		if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
			gossip_err("%s: len:%d:\n", __func__, len);
			ret = -EINVAL;
			goto free;
		}

		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: size:%zu: len:%d:\n",
			     __func__,
			     size,
			     len);

		readdir->dirent_array[i].d_name = buf + sizeof_u32;
		readdir->dirent_array[i].d_length = len;

		/*
		 * Calculate "aligned" length of this string and its
		 * associated __u32 descriptor.
		 */
		aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: aligned_len:%d:\n",
			     __func__,
			     aligned_len);

		/*
		 * The end of the blob should coincide with the end
		 * of the last sub-blob.
		 */
		if (size < aligned_len + khandle_size) {
			gossip_err("%s: ran off the end of the blob.\n",
				   __func__);
			ret = -EINVAL;
			goto free;
		}
		size -= aligned_len + khandle_size;

		buf += aligned_len;

		readdir->dirent_array[i].khandle =
			*(struct orangefs_khandle *) buf;
		buf += khandle_size;
	}
	ret = buf - ptr;
	gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
	goto out;

free:
	kfree(readdir->dirent_array);
	readdir->dirent_array = NULL;

out:
	return ret;
}

/*
 * Read directory entries from an instance of an open directory.
 */
static int orangefs_readdir(struct file *file, struct dir_context *ctx)
{
	int ret = 0;
	int buffer_index;
	/*
	 * ptoken supports Orangefs' distributed directory logic, added
	 * in 2.9.2.
	 */
	__u64 *ptoken = file->private_data;
	__u64 pos = 0;
	ino_t ino = 0;
	struct dentry *dentry = file->f_path.dentry;
	struct orangefs_kernel_op_s *new_op = NULL;
	struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
	int buffer_full = 0;
	struct orangefs_readdir_response_s readdir_response;
	void *dents_buf;
	int i = 0;
	int len = 0;
	ino_t current_ino = 0;
	char *current_entry = NULL;
	long bytes_decoded;

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: ctx->pos:%lld, ptoken = %llu\n",
		     __func__,
		     lld(ctx->pos),
		     llu(*ptoken));

	pos = (__u64) ctx->pos;

	/* are we done? */
	if (pos == ORANGEFS_READDIR_END) {
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Skipping to termination path\n");
		return 0;
	}

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "orangefs_readdir called on %s (pos=%llu)\n",
		     dentry->d_name.name, llu(pos));

	memset(&readdir_response, 0, sizeof(readdir_response));

	new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
	if (!new_op)
		return -ENOMEM;

	/*
	 * Only the indices are shared. No memory is actually shared, but the
	 * mechanism is used.
	 */
	new_op->uses_shared_memory = 1;
	new_op->upcall.req.readdir.refn = orangefs_inode->refn;
	new_op->upcall.req.readdir.max_dirent_count =
	    ORANGEFS_MAX_DIRENT_COUNT_READDIR;

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: upcall.req.readdir.refn.khandle: %pU\n",
		     __func__,
		     &new_op->upcall.req.readdir.refn.khandle);

	new_op->upcall.req.readdir.token = *ptoken;

get_new_buffer_index:
	buffer_index = orangefs_readdir_index_get();
	if (buffer_index < 0) {
		ret = buffer_index;
		gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
			    ret);
		goto out_free_op;
	}
	new_op->upcall.req.readdir.buf_index = buffer_index;

	ret = service_operation(new_op,
				"orangefs_readdir",
				get_interruptible_flag(dentry->d_inode));

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "Readdir downcall status is %d.  ret:%d\n",
		     new_op->downcall.status,
		     ret);

	orangefs_readdir_index_put(buffer_index);

	if (ret == -EAGAIN && op_state_purged(new_op)) {
		/* Client-core indices are invalid after it restarted. */
		gossip_debug(GOSSIP_DIR_DEBUG,
			"%s: Getting new buffer_index for retry of readdir..\n",
			 __func__);
		goto get_new_buffer_index;
	}

	if (ret == -EIO && op_state_purged(new_op)) {
		gossip_err("%s: Client is down. Aborting readdir call.\n",
			__func__);
		goto out_slot;
	}

	if (ret < 0 || new_op->downcall.status != 0) {
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "Readdir request failed.  Status:%d\n",
			     new_op->downcall.status);
		if (ret >= 0)
			ret = new_op->downcall.status;
		goto out_slot;
	}

	dents_buf = new_op->downcall.trailer_buf;
	if (dents_buf == NULL) {
		gossip_err("Invalid NULL buffer in readdir response\n");
		ret = -ENOMEM;
		goto out_slot;
	}

	bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
					&readdir_response);
	if (bytes_decoded < 0) {
		ret = bytes_decoded;
		gossip_err("Could not decode readdir from buffer %d\n", ret);
		goto out_vfree;
	}

	if (bytes_decoded != new_op->downcall.trailer_size) {
		gossip_err("orangefs_readdir: # bytes decoded (%ld) "
			   "!= trailer size (%ld)\n",
			   bytes_decoded,
			   (long)new_op->downcall.trailer_size);
		ret = -EINVAL;
		goto out_destroy_handle;
	}

	/*
	 *  orangefs doesn't actually store dot and dot-dot, but
	 *  we need to have them represented.
	 */
	if (pos == 0) {
		ino = get_ino_from_khandle(dentry->d_inode);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \".\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
		pos += 1;
	}

	if (pos == 1) {
		ino = get_parent_ino_from_dentry(dentry);
		gossip_debug(GOSSIP_DIR_DEBUG,
			     "%s: calling dir_emit of \"..\" with pos = %llu\n",
			     __func__,
			     llu(pos));
		ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
		pos += 1;
	}

	/*
	 * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
	 * to prevent "finding" dot and dot-dot on any iteration
	 * other than the first.
	 */
	if (ctx->pos == ORANGEFS_ITERATE_NEXT)
		ctx->pos = 0;

	gossip_debug(GOSSIP_DIR_DEBUG,
		     "%s: dirent_outcount:%d:\n",
		     __func__,
		     readdir_response.orangefs_dirent_outcount);
	for (i = ctx->pos;
	     i < readdir_response.orangefs_dirent_outcount;
	     i++) {
		len = readdir_response.dirent_array[i].d_length;
		current_entry = readdir_response.dirent_array[i].d_name;
		current_ino = orangefs_khandle_to_ino(
			&readdir_response.dirent_array[i].khandle);

		gossip_debug(GOSSIP_DIR_DEBUG,
			     "calling dir_emit for %s with len %d"
			     ", ctx->pos %ld\n",
			     current_entry,
			     len,
			     (unsigned long)ctx->pos);
		/*
		 * type is unknown. We don't return object type
		 * in the dirent_array. This leaves getdents
		 * clueless about type.
		 */
		ret =
		    dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
		if (!ret)
			break;
		ctx->pos++;
		gossip_debug(GOSSIP_DIR_DEBUG,
			      "%s: ctx->pos:%lld\n",
			      __func__,
			      lld(ctx->pos));

	}

	/*
	 * we ran all the way through the last batch, set up for
	 * getting another batch...
	 */
	if (ret) {
		*ptoken = readdir_response.token;
		ctx->pos = ORANGEFS_ITERATE_NEXT;
	}

	/*
	 * Did we hit the end of the directory?
	 */
	if (readdir_response.token == ORANGEFS_READDIR_END &&
	    !buffer_full) {
		gossip_debug(GOSSIP_DIR_DEBUG,
		"End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
		ctx->pos = ORANGEFS_READDIR_END;
	}

out_destroy_handle:
	/* kfree(NULL) is safe */
	kfree(readdir_response.dirent_array);
out_vfree:
	gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
	vfree(dents_buf);
out_slot:
	orangefs_readdir_index_put(buffer_index);
out_free_op:
	op_release(new_op);
	gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
	return ret;
}

static int orangefs_dir_open(struct inode *inode, struct file *file)
{
	__u64 *ptoken;

	file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
	if (!file->private_data)
		return -ENOMEM;

	ptoken = file->private_data;
	*ptoken = ORANGEFS_READDIR_START;
	return 0;
}

static int orangefs_dir_release(struct inode *inode, struct file *file)
{
	orangefs_flush_inode(inode);
	kfree(file->private_data);
	return 0;
}

/** ORANGEFS implementation of VFS directory operations */
const struct file_operations orangefs_dir_operations = {
	.read = generic_read_dir,
	.iterate = orangefs_readdir,
	.open = orangefs_dir_open,
	.release = orangefs_dir_release,
};