gem5
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cl_driver.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its contributors
18  * may be used to endorse or promote products derived from this software
19  * without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Author: Anthony Gutierrez
34  */
35 
36 #include "gpu-compute/cl_driver.hh"
37 
38 #include <memory>
39 
40 #include "base/intmath.hh"
41 #include "cpu/thread_context.hh"
43 #include "gpu-compute/hsa_code.hh"
46 #include "params/ClDriver.hh"
47 #include "sim/process.hh"
48 #include "sim/syscall_emul_buf.hh"
49 
50 ClDriver::ClDriver(ClDriverParams *p)
51  : EmulatedDriver(p), hsaCode(0)
52 {
53  for (const auto &codeFile : p->codefile)
54  codeFiles.push_back(&codeFile);
55 
56  maxFuncArgsSize = 0;
57 
58  for (int i = 0; i < codeFiles.size(); ++i) {
60 
61  for (int k = 0; k < obj->numKernels(); ++k) {
62  assert(obj->getKernel(k));
63  kernels.push_back(obj->getKernel(k));
64  kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
65  int kern_funcargs_size = kernels.back()->funcarg_size;
66  maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
67  kern_funcargs_size : maxFuncArgsSize;
68  }
69  }
70 
71  int name_offs = 0;
72  int code_offs = 0;
73 
74  for (int i = 0; i < kernels.size(); ++i) {
75  kernelInfo.push_back(HsaKernelInfo());
76  HsaCode *k = kernels[i];
77 
79 
80  kernelInfo[i].name_offs = name_offs;
81  kernelInfo[i].code_offs = code_offs;
82 
83  name_offs += k->name().size() + 1;
84  code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
85  }
86 }
87 
88 void
90 {
91  dispatcher = _dispatcher;
93 }
94 
95 int
96 ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags)
97 {
98  std::shared_ptr<DeviceFDEntry> fdp;
99  fdp = std::make_shared<DeviceFDEntry>(this, filename);
100  int tgt_fd = p->fds->allocFD(fdp);
101  return tgt_fd;
102 }
103 
104 int
105 ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req)
106 {
107  int index = 2;
108  Addr buf_addr = process->getSyscallArg(tc, index);
109 
110  switch (req) {
111  case HSA_GET_SIZES:
112  {
113  TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
114  sizes->num_kernels = kernels.size();
115  sizes->string_table_size = 0;
116  sizes->code_size = 0;
117  sizes->readonly_size = 0;
118 
119  if (kernels.size() > 0) {
120  // all kernels will share the same read-only memory
121  sizes->readonly_size =
123  // check our assumption
124  for (int i = 1; i<kernels.size(); ++i) {
125  assert(sizes->readonly_size ==
127  }
128  }
129 
130  for (int i = 0; i < kernels.size(); ++i) {
131  HsaCode *k = kernels[i];
132  // add one for terminating '\0'
133  sizes->string_table_size += k->name().size() + 1;
134  sizes->code_size +=
135  k->numInsts() * sizeof(TheGpuISA::RawMachInst);
136  }
137 
138  sizes.copyOut(tc->getMemProxy());
139  }
140  break;
141 
142  case HSA_GET_KINFO:
143  {
145  kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
146 
147  for (int i = 0; i < kernels.size(); ++i) {
148  HsaKernelInfo *ki = &kinfo[i];
149  ki->name_offs = kernelInfo[i].name_offs;
150  ki->code_offs = kernelInfo[i].code_offs;
151  ki->sRegCount = kernelInfo[i].sRegCount;
152  ki->dRegCount = kernelInfo[i].dRegCount;
153  ki->cRegCount = kernelInfo[i].cRegCount;
154  ki->static_lds_size = kernelInfo[i].static_lds_size;
155  ki->private_mem_size = kernelInfo[i].private_mem_size;
156  ki->spill_mem_size = kernelInfo[i].spill_mem_size;
157  }
158 
159  kinfo.copyOut(tc->getMemProxy());
160  }
161  break;
162 
163  case HSA_GET_STRINGS:
164  {
165  int string_table_size = 0;
166  for (int i = 0; i < kernels.size(); ++i) {
167  HsaCode *k = kernels[i];
168  string_table_size += k->name().size() + 1;
169  }
170 
171  BufferArg buf(buf_addr, string_table_size);
172  char *bufp = (char*)buf.bufferPtr();
173 
174  for (int i = 0; i < kernels.size(); ++i) {
175  HsaCode *k = kernels[i];
176  const char *n = k->name().c_str();
177 
178  // idiomatic string copy
179  while ((*bufp++ = *n++));
180  }
181 
182  assert(bufp - (char *)buf.bufferPtr() == string_table_size);
183 
184  buf.copyOut(tc->getMemProxy());
185  }
186  break;
187 
189  {
190  // we can pick any kernel --- they share the same
191  // readonly segment (this assumption is checked in GET_SIZES)
192  uint64_t size =
193  kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
194  BufferArg data(buf_addr, size);
195  char *datap = (char *)data.bufferPtr();
196  memcpy(datap,
197  kernels.back()->readonly_data,
198  size);
199  data.copyOut(tc->getMemProxy());
200  }
201  break;
202 
203  case HSA_GET_CODE:
204  {
205  // set hsaCode pointer
206  hsaCode = buf_addr;
207  int code_size = 0;
208 
209  for (int i = 0; i < kernels.size(); ++i) {
210  HsaCode *k = kernels[i];
211  code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
212  }
213 
214  TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
215  TheGpuISA::RawMachInst *bufp = buf;
216 
217  int buf_idx = 0;
218 
219  for (int i = 0; i < kernels.size(); ++i) {
220  HsaCode *k = kernels[i];
221 
222  for (int j = 0; j < k->numInsts(); ++j) {
223  bufp[buf_idx] = k->insts()->at(j);
224  ++buf_idx;
225  }
226  }
227 
228  buf.copyOut(tc->getMemProxy());
229  }
230  break;
231 
232  case HSA_GET_CU_CNT:
233  {
234  BufferArg buf(buf_addr, sizeof(uint32_t));
235  *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
236  buf.copyOut(tc->getMemProxy());
237  }
238  break;
239 
240  case HSA_GET_VSZ:
241  {
242  BufferArg buf(buf_addr, sizeof(uint32_t));
243  *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
244  buf.copyOut(tc->getMemProxy());
245  }
246  break;
248  {
249  BufferArg buf(buf_addr, sizeof(uint32_t));
250  *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
251  buf.copyOut(tc->getMemProxy());
252  }
253  break;
254 
255  default:
256  fatal("ClDriver: bad ioctl %d\n", req);
257  }
258 
259  return 0;
260 }
261 
262 const char*
264 {
265  assert(hsaCode);
266  uint32_t code_offs = code_ptr - hsaCode;
267 
268  for (int i = 0; i < kernels.size(); ++i) {
269  if (code_offs == kernelInfo[i].code_offs) {
270  return kernels[i]->name().c_str();
271  }
272  }
273 
274  return nullptr;
275 }
276 
277 ClDriver*
278 ClDriverParams::create()
279 {
280  return new ClDriver(this);
281 }
This file defines buffer classes used to handle pointer arguments in emulated syscalls.
std::vector< TheGpuISA::RawMachInst > * insts()
Definition: hsa_code.hh:78
Bitfield< 30, 0 > index
int maxFuncArgsSize
Definition: cl_driver.hh:72
int open(Process *p, ThreadContext *tc, int mode, int flags)
Abstract method, invoked when the user program calls open() on the device driver. ...
Definition: cl_driver.cc:96
Bitfield< 7 > i
Definition: miscregs.hh:1378
uint32_t spill_mem_size
uint32_t string_table_size
static const int HSA_GET_STRINGS
uint32_t code_offs
virtual void generateHsaKernelInfo(HsaKernelInfo *hsaKernelInfo) const =0
int wfSize() const
Definition: dispatcher.cc:391
uint32_t getStaticContextSize() const
Returns the size of the static hardware context of a wavefront.
Definition: dispatcher.cc:403
virtual TheISA::IntReg getSyscallArg(ThreadContext *tc, int &i)=0
void handshake(GpuDispatcher *_dispatcher)
Definition: cl_driver.cc:89
void setFuncargsSize(int funcargs_size)
Definition: dispatcher.cc:397
static const int HSA_GET_KINFO
uint32_t name_offs
uint32_t readonly_size
Bitfield< 4, 0 > mode
Definition: miscregs.hh:1385
uint32_t static_lds_size
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
ThreadContext is the external interface to all thread state for anything outside of the CPU...
static const int HSA_GET_HW_STATIC_CONTEXT_SIZE
uint64_t hsaCode
Definition: cl_driver.hh:74
Bitfield< 31 > n
Definition: miscregs.hh:1636
const char data[]
Definition: circlebuf.cc:43
uint32_t dRegCount
bool copyOut(SETranslatingPortProxy &memproxy)
copy data out of simulator space (write to target memory)
uint8_t * readonlyData
Definition: hsa_object.hh:67
uint32_t num_kernels
static HsaObject * createHsaObject(const std::string &fname)
Definition: hsa_object.cc:49
Bitfield< 23 > k
Definition: dt_constants.hh:80
int numInsts() const
Definition: hsa_code.hh:77
const std::string & name() const
Definition: hsa_code.hh:76
#define fatal(...)
Definition: misc.hh:163
void * bufferPtr()
Return a pointer to the internal simulator-space buffer.
const char * codeOffToKernelName(uint64_t code_ptr)
Definition: cl_driver.cc:263
uint32_t private_mem_size
virtual SETranslatingPortProxy & getMemProxy()=0
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
static const int HSA_GET_CODE
static const int HSA_GET_READONLY_DATA
Bitfield< 24 > j
Definition: miscregs.hh:1369
std::vector< const std::string * > codeFiles
Definition: cl_driver.hh:63
std::vector< HsaCode * > kernels
Definition: cl_driver.hh:66
static const int HSA_GET_SIZES
uint32_t cRegCount
int size()
Definition: pagetable.hh:146
const std::string & filename
filename for opening this driver (under /dev)
Definition: emul_driver.hh:58
static const int HSA_GET_CU_CNT
static const int HSA_GET_VSZ
virtual int numKernels() const =0
GpuDispatcher * dispatcher
Definition: cl_driver.hh:61
ClDriver(ClDriverParams *p)
Definition: cl_driver.cc:50
BufferArg represents an untyped buffer in target user space that is passed by reference to an (emulat...
EmulatedDriver is an abstract base class for fake SE-mode device drivers.
Definition: emul_driver.hh:52
int ioctl(Process *p, ThreadContext *tc, unsigned req)
Abstract method, invoked when the user program calls ioctl() on the file descriptor returned by a pre...
Definition: cl_driver.cc:105
virtual HsaCode * getKernel(const std::string &name) const =0
uint32_t sRegCount
Bitfield< 0 > p
std::vector< HsaKernelInfo > kernelInfo
Definition: cl_driver.hh:69
std::shared_ptr< FDArray > fds
Definition: process.hh:203
uint32_t RawMachInst
Definition: gpu_types.hh:54

Generated on Fri Jun 9 2017 13:03:47 for gem5 by doxygen 1.8.6